From 85c2075631fe3e265a9caaa4420b6debbeec1338 Mon Sep 17 00:00:00 2001 From: Sara Robinson Date: Thu, 12 Feb 2026 08:50:53 -0800 Subject: [PATCH] feat: Remove UnifiedMetric from the Vertex SDK and import from the GenAI SDK PiperOrigin-RevId: 869238994 --- .../replays/test_create_evaluation_run.py | 24 +- vertexai/_genai/_evals_common.py | 8 +- vertexai/_genai/evals.py | 238 +----------- vertexai/_genai/types/__init__.py | 40 +- vertexai/_genai/types/common.py | 343 ++++-------------- 5 files changed, 93 insertions(+), 560 deletions(-) diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index cd97ab042c..3d27df2b68 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -22,24 +22,24 @@ GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output" GENERAL_QUALITY_METRIC = types.EvaluationRunMetric( metric="general_quality_v1", - metric_config=types.UnifiedMetric( - predefined_metric_spec=types.PredefinedMetricSpec( + metric_config=genai_types.UnifiedMetric( + predefined_metric_spec=genai_types.PredefinedMetricSpec( metric_spec_name="general_quality_v1", ) ), ) FINAL_RESPONSE_QUALITY_METRIC = types.EvaluationRunMetric( metric="final_response_quality_v1", - metric_config=types.UnifiedMetric( - predefined_metric_spec=types.PredefinedMetricSpec( + metric_config=genai_types.UnifiedMetric( + predefined_metric_spec=genai_types.PredefinedMetricSpec( metric_spec_name="final_response_quality_v1", ) ), ) LLM_METRIC = types.EvaluationRunMetric( metric="llm_metric", - metric_config=types.UnifiedMetric( - llm_based_metric_spec=types.LLMBasedMetricSpec( + metric_config=genai_types.UnifiedMetric( + llm_based_metric_spec=genai_types.LLMBasedMetricSpec( metric_prompt_template=( "\nEvaluate the fluency of the response. Provide a score from 1-5." ) @@ -48,17 +48,17 @@ ) EXACT_MATCH_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric( metric="exact_match", - metric_config=types.UnifiedMetric( - computation_based_metric_spec=types.ComputationBasedMetricSpec( - type=types.ComputationBasedMetricType.EXACT_MATCH, + metric_config=genai_types.UnifiedMetric( + computation_based_metric_spec=genai_types.ComputationBasedMetricSpec( + type=genai_types.ComputationBasedMetricType.EXACT_MATCH, ) ), ) BLEU_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric( metric="exact_match_2", - metric_config=types.UnifiedMetric( - computation_based_metric_spec=types.ComputationBasedMetricSpec( - type=types.ComputationBasedMetricType.BLEU, + metric_config=genai_types.UnifiedMetric( + computation_based_metric_spec=genai_types.ComputationBasedMetricSpec( + type=genai_types.ComputationBasedMetricType.BLEU, parameters={"use_effective_order": True}, ) ), diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index f33320324a..3dd2867859 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -1041,8 +1041,8 @@ def _resolve_evaluation_run_metrics( resolved_metrics_list.append( types.EvaluationRunMetric( metric=resolved_metric.name, - metric_config=types.UnifiedMetric( - predefined_metric_spec=types.PredefinedMetricSpec( + metric_config=genai_types.UnifiedMetric( + predefined_metric_spec=genai_types.PredefinedMetricSpec( metric_spec_name=resolved_metric.name, ) ), @@ -1072,8 +1072,8 @@ def _resolve_evaluation_run_metrics( resolved_metrics_list.append( types.EvaluationRunMetric( metric=resolved_metric.name, - metric_config=types.UnifiedMetric( - predefined_metric_spec=types.PredefinedMetricSpec( + metric_config=genai_types.UnifiedMetric( + predefined_metric_spec=genai_types.PredefinedMetricSpec( metric_spec_name=resolved_metric.name, ) ), diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 3632628b87..ee2e2c40fd 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -81,13 +81,7 @@ def _CreateEvaluationRunParameters_to_vertex( setv(to_object, ["dataSource"], getv(from_object, ["data_source"])) if getv(from_object, ["evaluation_config"]) is not None: - setv( - to_object, - ["evaluationConfig"], - _EvaluationRunConfig_to_vertex( - getv(from_object, ["evaluation_config"]), to_object - ), - ) + setv(to_object, ["evaluationConfig"], getv(from_object, ["evaluation_config"])) if getv(from_object, ["labels"]) is not None: setv(to_object, ["labels"], getv(from_object, ["labels"])) @@ -118,36 +112,6 @@ def _CreateEvaluationSetParameters_to_vertex( return to_object -def _CustomCodeExecutionSpec_from_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["evaluation_function"]) is not None: - setv( - to_object, - ["remote_custom_function"], - getv(from_object, ["evaluation_function"]), - ) - - return to_object - - -def _CustomCodeExecutionSpec_to_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["remote_custom_function"]) is not None: - setv( - to_object, - ["evaluation_function"], - getv(from_object, ["remote_custom_function"]), - ) - - return to_object - - def _EvaluateInstancesRequestParameters_to_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -232,90 +196,6 @@ def _EvaluateInstancesRequestParameters_to_vertex( return to_object -def _EvaluationRunConfig_from_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["metrics"]) is not None: - setv( - to_object, - ["metrics"], - [ - _EvaluationRunMetric_from_vertex(item, to_object) - for item in getv(from_object, ["metrics"]) - ], - ) - - if getv(from_object, ["outputConfig"]) is not None: - setv(to_object, ["output_config"], getv(from_object, ["outputConfig"])) - - if getv(from_object, ["autoraterConfig"]) is not None: - setv(to_object, ["autorater_config"], getv(from_object, ["autoraterConfig"])) - - return to_object - - -def _EvaluationRunConfig_to_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["metrics"]) is not None: - setv( - to_object, - ["metrics"], - [ - _EvaluationRunMetric_to_vertex(item, to_object) - for item in getv(from_object, ["metrics"]) - ], - ) - - if getv(from_object, ["output_config"]) is not None: - setv(to_object, ["outputConfig"], getv(from_object, ["output_config"])) - - if getv(from_object, ["autorater_config"]) is not None: - setv(to_object, ["autoraterConfig"], getv(from_object, ["autorater_config"])) - - return to_object - - -def _EvaluationRunMetric_from_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["metric"]) is not None: - setv(to_object, ["metric"], getv(from_object, ["metric"])) - - if getv(from_object, ["metricConfig"]) is not None: - setv( - to_object, - ["metric_config"], - _UnifiedMetric_from_vertex(getv(from_object, ["metricConfig"]), to_object), - ) - - return to_object - - -def _EvaluationRunMetric_to_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["metric"]) is not None: - setv(to_object, ["metric"], getv(from_object, ["metric"])) - - if getv(from_object, ["metric_config"]) is not None: - setv( - to_object, - ["metricConfig"], - _UnifiedMetric_to_vertex(getv(from_object, ["metric_config"]), to_object), - ) - - return to_object - - def _EvaluationRun_from_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -360,13 +240,7 @@ def _EvaluationRun_from_vertex( ) if getv(from_object, ["evaluationConfig"]) is not None: - setv( - to_object, - ["evaluation_config"], - _EvaluationRunConfig_from_vertex( - getv(from_object, ["evaluationConfig"]), to_object - ), - ) + setv(to_object, ["evaluation_config"], getv(from_object, ["evaluationConfig"])) if getv(from_object, ["inferenceConfigs"]) is not None: setv(to_object, ["inference_configs"], getv(from_object, ["inferenceConfigs"])) @@ -536,108 +410,6 @@ def _RubricGenerationSpec_to_vertex( return to_object -def _UnifiedMetric_from_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["bleuSpec"]) is not None: - setv(to_object, ["bleu_spec"], getv(from_object, ["bleuSpec"])) - - if getv(from_object, ["rougeSpec"]) is not None: - setv(to_object, ["rouge_spec"], getv(from_object, ["rougeSpec"])) - - if getv(from_object, ["pointwiseMetricSpec"]) is not None: - setv( - to_object, - ["pointwise_metric_spec"], - getv(from_object, ["pointwiseMetricSpec"]), - ) - - if getv(from_object, ["llmBasedMetricSpec"]) is not None: - setv( - to_object, - ["llm_based_metric_spec"], - getv(from_object, ["llmBasedMetricSpec"]), - ) - - if getv(from_object, ["customCodeExecutionSpec"]) is not None: - setv( - to_object, - ["custom_code_execution_spec"], - _CustomCodeExecutionSpec_from_vertex( - getv(from_object, ["customCodeExecutionSpec"]), to_object - ), - ) - - if getv(from_object, ["predefinedMetricSpec"]) is not None: - setv( - to_object, - ["predefined_metric_spec"], - getv(from_object, ["predefinedMetricSpec"]), - ) - - if getv(from_object, ["computationBasedMetricSpec"]) is not None: - setv( - to_object, - ["computation_based_metric_spec"], - getv(from_object, ["computationBasedMetricSpec"]), - ) - - return to_object - - -def _UnifiedMetric_to_vertex( - from_object: Union[dict[str, Any], object], - parent_object: Optional[dict[str, Any]] = None, -) -> dict[str, Any]: - to_object: dict[str, Any] = {} - if getv(from_object, ["bleu_spec"]) is not None: - setv(to_object, ["bleuSpec"], getv(from_object, ["bleu_spec"])) - - if getv(from_object, ["rouge_spec"]) is not None: - setv(to_object, ["rougeSpec"], getv(from_object, ["rouge_spec"])) - - if getv(from_object, ["pointwise_metric_spec"]) is not None: - setv( - to_object, - ["pointwiseMetricSpec"], - getv(from_object, ["pointwise_metric_spec"]), - ) - - if getv(from_object, ["llm_based_metric_spec"]) is not None: - setv( - to_object, - ["llmBasedMetricSpec"], - getv(from_object, ["llm_based_metric_spec"]), - ) - - if getv(from_object, ["custom_code_execution_spec"]) is not None: - setv( - to_object, - ["customCodeExecutionSpec"], - _CustomCodeExecutionSpec_to_vertex( - getv(from_object, ["custom_code_execution_spec"]), to_object - ), - ) - - if getv(from_object, ["predefined_metric_spec"]) is not None: - setv( - to_object, - ["predefinedMetricSpec"], - getv(from_object, ["predefined_metric_spec"]), - ) - - if getv(from_object, ["computation_based_metric_spec"]) is not None: - setv( - to_object, - ["computationBasedMetricSpec"], - getv(from_object, ["computation_based_metric_spec"]), - ) - - return to_object - - class Evals(_api_module.BaseModule): def _create_evaluation_item( @@ -908,7 +680,7 @@ def _generate_rubrics( *, contents: list[genai_types.ContentOrDict], predefined_rubric_generation_spec: Optional[ - types.PredefinedMetricSpecOrDict + genai_types.PredefinedMetricSpecOrDict ] = None, rubric_generation_spec: Optional[types.RubricGenerationSpecOrDict] = None, config: Optional[types.RubricGenerationConfigOrDict] = None, @@ -1480,7 +1252,7 @@ def generate_rubrics( "Could not determine metric_spec_name from predefined_spec_name" ) - predefined_spec = types.PredefinedMetricSpec( + predefined_spec = genai_types.PredefinedMetricSpec( metric_spec_name=actual_predefined_spec_name, metric_spec_parameters=metric_spec_parameters, ) @@ -2107,7 +1879,7 @@ async def _generate_rubrics( *, contents: list[genai_types.ContentOrDict], predefined_rubric_generation_spec: Optional[ - types.PredefinedMetricSpecOrDict + genai_types.PredefinedMetricSpecOrDict ] = None, rubric_generation_spec: Optional[types.RubricGenerationSpecOrDict] = None, config: Optional[types.RubricGenerationConfigOrDict] = None, diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py index 19c3b021bb..1efc4dbc83 100644 --- a/vertexai/_genai/types/__init__.py +++ b/vertexai/_genai/types/__init__.py @@ -180,10 +180,6 @@ from .common import CometResult from .common import CometResultDict from .common import CometResultOrDict -from .common import ComputationBasedMetricSpec -from .common import ComputationBasedMetricSpecDict -from .common import ComputationBasedMetricSpecOrDict -from .common import ComputationBasedMetricType from .common import ContainerSpec from .common import ContainerSpecDict from .common import ContainerSpecOrDict @@ -226,9 +222,6 @@ from .common import CreatePromptVersionConfig from .common import CreatePromptVersionConfigDict from .common import CreatePromptVersionConfigOrDict -from .common import CustomCodeExecutionSpec -from .common import CustomCodeExecutionSpecDict -from .common import CustomCodeExecutionSpecOrDict from .common import CustomJob from .common import CustomJobDict from .common import CustomJobOrDict @@ -532,9 +525,6 @@ from .common import ListReasoningEnginesSessionsResponse from .common import ListReasoningEnginesSessionsResponseDict from .common import ListReasoningEnginesSessionsResponseOrDict -from .common import LLMBasedMetricSpec -from .common import LLMBasedMetricSpecDict -from .common import LLMBasedMetricSpecOrDict from .common import LLMMetric from .common import LustreMount from .common import LustreMountDict @@ -668,9 +658,6 @@ from .common import PointwiseMetricResult from .common import PointwiseMetricResultDict from .common import PointwiseMetricResultOrDict -from .common import PredefinedMetricSpec -from .common import PredefinedMetricSpecDict -from .common import PredefinedMetricSpecOrDict from .common import Prompt from .common import PromptData from .common import PromptDataDict @@ -1021,9 +1008,6 @@ from .common import TuningValidationAssessmentResultDict from .common import TuningValidationAssessmentResultOrDict from .common import Type -from .common import UnifiedMetric -from .common import UnifiedMetricDict -from .common import UnifiedMetricOrDict from .common import UpdateAgentEngineConfig from .common import UpdateAgentEngineConfigDict from .common import UpdateAgentEngineConfigOrDict @@ -1080,24 +1064,6 @@ "EvaluationRunDataSource", "EvaluationRunDataSourceDict", "EvaluationRunDataSourceOrDict", - "PredefinedMetricSpec", - "PredefinedMetricSpecDict", - "PredefinedMetricSpecOrDict", - "RubricGenerationSpec", - "RubricGenerationSpecDict", - "RubricGenerationSpecOrDict", - "LLMBasedMetricSpec", - "LLMBasedMetricSpecDict", - "LLMBasedMetricSpecOrDict", - "CustomCodeExecutionSpec", - "CustomCodeExecutionSpecDict", - "CustomCodeExecutionSpecOrDict", - "ComputationBasedMetricSpec", - "ComputationBasedMetricSpecDict", - "ComputationBasedMetricSpecOrDict", - "UnifiedMetric", - "UnifiedMetricDict", - "UnifiedMetricOrDict", "EvaluationRunMetric", "EvaluationRunMetricDict", "EvaluationRunMetricOrDict", @@ -1245,6 +1211,9 @@ "EvaluateInstancesConfig", "EvaluateInstancesConfigDict", "EvaluateInstancesConfigOrDict", + "RubricGenerationSpec", + "RubricGenerationSpecDict", + "RubricGenerationSpecOrDict", "RubricBasedMetricSpec", "RubricBasedMetricSpecDict", "RubricBasedMetricSpecOrDict", @@ -1958,9 +1927,8 @@ "State", "EvaluationItemType", "SamplingMethod", - "RubricContentType", - "ComputationBasedMetricType", "EvaluationRunState", + "RubricContentType", "OptimizeTarget", "MemoryMetadataMergeStrategy", "GenerateMemoriesResponseGeneratedMemoryAction", diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index b56d563fdf..a5fc89c718 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -313,32 +313,6 @@ class SamplingMethod(_common.CaseInSensitiveEnum): """Sampling method is random.""" -class RubricContentType(_common.CaseInSensitiveEnum): - """Specifies the type of rubric content to generate.""" - - PROPERTY = "PROPERTY" - """Generate rubrics based on properties.""" - NL_QUESTION_ANSWER = "NL_QUESTION_ANSWER" - """Generate rubrics in an NL question answer format.""" - PYTHON_CODE_ASSERTION = "PYTHON_CODE_ASSERTION" - """Generate rubrics in a unit test format.""" - - -class ComputationBasedMetricType(_common.CaseInSensitiveEnum): - """Represents the type of the computation based metric.""" - - COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = ( - "COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED" - ) - """Computation based metric type is unspecified.""" - EXACT_MATCH = "EXACT_MATCH" - """Exact match metric.""" - BLEU = "BLEU" - """BLEU metric.""" - ROUGE = "ROUGE" - """ROUGE metric.""" - - class EvaluationRunState(_common.CaseInSensitiveEnum): """Represents the state of an evaluation run.""" @@ -360,6 +334,17 @@ class EvaluationRunState(_common.CaseInSensitiveEnum): """Evaluation run is performing rubric generation.""" +class RubricContentType(_common.CaseInSensitiveEnum): + """Specifies the type of rubric content to generate.""" + + PROPERTY = "PROPERTY" + """Generate rubrics based on properties.""" + NL_QUESTION_ANSWER = "NL_QUESTION_ANSWER" + """Generate rubrics in an NL question answer format.""" + PYTHON_CODE_ASSERTION = "PYTHON_CODE_ASSERTION" + """Generate rubrics in a unit test format.""" + + class OptimizeTarget(_common.CaseInSensitiveEnum): """Specifies the method for calling the optimize_prompt.""" @@ -831,256 +816,13 @@ class EvaluationRunDataSourceDict(TypedDict, total=False): ] -class PredefinedMetricSpec(_common.BaseModel): - """Spec for predefined metric.""" - - metric_spec_name: Optional[str] = Field( - default=None, - description="""The name of a pre-defined metric, such as "instruction_following_v1" or - "text_quality_v1".""", - ) - metric_spec_parameters: Optional[dict[str, Any]] = Field( - default=None, - description="""The parameters needed to run the pre-defined metric.""", - ) - - -class PredefinedMetricSpecDict(TypedDict, total=False): - """Spec for predefined metric.""" - - metric_spec_name: Optional[str] - """The name of a pre-defined metric, such as "instruction_following_v1" or - "text_quality_v1".""" - - metric_spec_parameters: Optional[dict[str, Any]] - """The parameters needed to run the pre-defined metric.""" - - -PredefinedMetricSpecOrDict = Union[PredefinedMetricSpec, PredefinedMetricSpecDict] - - -class RubricGenerationSpec(_common.BaseModel): - """Spec for generating rubrics.""" - - prompt_template: Optional[str] = Field( - default=None, - description="""Template for the prompt used to generate rubrics. - The details should be updated based on the most-recent recipe requirements.""", - ) - generator_model_config: Optional[genai_types.AutoraterConfig] = Field( - default=None, - description="""Configuration for the model used in rubric generation. - Configs including sampling count and base model can be specified here. - Flipping is not supported for rubric generation.""", - ) - rubric_content_type: Optional[RubricContentType] = Field( - default=None, description="""The type of rubric content to be generated.""" - ) - rubric_type_ontology: Optional[list[str]] = Field( - default=None, - description="""An optional, pre-defined list of allowed types for generated rubrics. - If this field is provided, it implies `include_rubric_type` should be true, - and the generated rubric types should be chosen from this ontology.""", - ) - - -class RubricGenerationSpecDict(TypedDict, total=False): - """Spec for generating rubrics.""" - - prompt_template: Optional[str] - """Template for the prompt used to generate rubrics. - The details should be updated based on the most-recent recipe requirements.""" - - generator_model_config: Optional[genai_types.AutoraterConfigDict] - """Configuration for the model used in rubric generation. - Configs including sampling count and base model can be specified here. - Flipping is not supported for rubric generation.""" - - rubric_content_type: Optional[RubricContentType] - """The type of rubric content to be generated.""" - - rubric_type_ontology: Optional[list[str]] - """An optional, pre-defined list of allowed types for generated rubrics. - If this field is provided, it implies `include_rubric_type` should be true, - and the generated rubric types should be chosen from this ontology.""" - - -RubricGenerationSpecOrDict = Union[RubricGenerationSpec, RubricGenerationSpecDict] - - -class LLMBasedMetricSpec(_common.BaseModel): - """Specification for an LLM based metric.""" - - metric_prompt_template: Optional[str] = Field( - default=None, description="""Template for the prompt sent to the judge model.""" - ) - system_instruction: Optional[str] = Field( - default=None, description="""System instruction for the judge model.""" - ) - judge_autorater_config: Optional[genai_types.AutoraterConfig] = Field( - default=None, - description="""Optional configuration for the judge LLM (Autorater).""", - ) - rubric_group_key: Optional[str] = Field( - default=None, - description="""Use a pre-defined group of rubrics associated with the input. - Refers to a key in the rubric_groups map of EvaluationInstance.""", - ) - predefined_rubric_generation_spec: Optional[PredefinedMetricSpec] = Field( - default=None, - description="""Dynamically generate rubrics using a predefined spec.""", - ) - rubric_generation_spec: Optional[RubricGenerationSpec] = Field( - default=None, - description="""Dynamically generate rubrics using this specification.""", - ) - - -class LLMBasedMetricSpecDict(TypedDict, total=False): - """Specification for an LLM based metric.""" - - metric_prompt_template: Optional[str] - """Template for the prompt sent to the judge model.""" - - system_instruction: Optional[str] - """System instruction for the judge model.""" - - judge_autorater_config: Optional[genai_types.AutoraterConfigDict] - """Optional configuration for the judge LLM (Autorater).""" - - rubric_group_key: Optional[str] - """Use a pre-defined group of rubrics associated with the input. - Refers to a key in the rubric_groups map of EvaluationInstance.""" - - predefined_rubric_generation_spec: Optional[PredefinedMetricSpecDict] - """Dynamically generate rubrics using a predefined spec.""" - - rubric_generation_spec: Optional[RubricGenerationSpecDict] - """Dynamically generate rubrics using this specification.""" - - -LLMBasedMetricSpecOrDict = Union[LLMBasedMetricSpec, LLMBasedMetricSpecDict] - - -class CustomCodeExecutionSpec(_common.BaseModel): - """Specificies a metric that is computed by running user-defined Python functions remotely.""" - - remote_custom_function: Optional[str] = Field( - default=None, - description="""A string representing a user-defined function for evaluation. - Expected user to define the following function, e.g.: - def evaluate(instance: dict[str, Any]) -> float: - Please include this function signature in the code snippet. - Instance is the evaluation instance, any fields populated in the instance - are available to the function as instance[field_name].""", - ) - - -class CustomCodeExecutionSpecDict(TypedDict, total=False): - """Specificies a metric that is computed by running user-defined Python functions remotely.""" - - remote_custom_function: Optional[str] - """A string representing a user-defined function for evaluation. - Expected user to define the following function, e.g.: - def evaluate(instance: dict[str, Any]) -> float: - Please include this function signature in the code snippet. - Instance is the evaluation instance, any fields populated in the instance - are available to the function as instance[field_name].""" - - -CustomCodeExecutionSpecOrDict = Union[ - CustomCodeExecutionSpec, CustomCodeExecutionSpecDict -] - - -class ComputationBasedMetricSpec(_common.BaseModel): - """Specification for a computation based metric.""" - - type: Optional[ComputationBasedMetricType] = Field( - default=None, description="""The type of the computation based metric.""" - ) - parameters: Optional[dict[str, Any]] = Field( - default=None, - description="""A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}.""", - ) - - -class ComputationBasedMetricSpecDict(TypedDict, total=False): - """Specification for a computation based metric.""" - - type: Optional[ComputationBasedMetricType] - """The type of the computation based metric.""" - - parameters: Optional[dict[str, Any]] - """A map of parameters for the metric. ROUGE example: {"rouge_type": "rougeL", "split_summaries": True, "use_stemmer": True}. BLEU example: {"use_effective_order": True}.""" - - -ComputationBasedMetricSpecOrDict = Union[ - ComputationBasedMetricSpec, ComputationBasedMetricSpecDict -] - - -class UnifiedMetric(_common.BaseModel): - """The unified metric used for evaluation.""" - - bleu_spec: Optional[genai_types.BleuSpec] = Field( - default=None, description="""The Bleu metric spec.""" - ) - rouge_spec: Optional[genai_types.RougeSpec] = Field( - default=None, description="""The rouge metric spec.""" - ) - pointwise_metric_spec: Optional[genai_types.PointwiseMetricSpec] = Field( - default=None, description="""The pointwise metric spec.""" - ) - llm_based_metric_spec: Optional[LLMBasedMetricSpec] = Field( - default=None, description="""The spec for an LLM based metric.""" - ) - custom_code_execution_spec: Optional[CustomCodeExecutionSpec] = Field( - default=None, description="""The spec for a custom code execution metric.""" - ) - predefined_metric_spec: Optional[PredefinedMetricSpec] = Field( - default=None, description="""The spec for a pre-defined metric.""" - ) - computation_based_metric_spec: Optional[ComputationBasedMetricSpec] = Field( - default=None, description="""The spec for a computation based metric.""" - ) - - -class UnifiedMetricDict(TypedDict, total=False): - """The unified metric used for evaluation.""" - - bleu_spec: Optional[genai_types.BleuSpecDict] - """The Bleu metric spec.""" - - rouge_spec: Optional[genai_types.RougeSpecDict] - """The rouge metric spec.""" - - pointwise_metric_spec: Optional[genai_types.PointwiseMetricSpecDict] - """The pointwise metric spec.""" - - llm_based_metric_spec: Optional[LLMBasedMetricSpecDict] - """The spec for an LLM based metric.""" - - custom_code_execution_spec: Optional[CustomCodeExecutionSpecDict] - """The spec for a custom code execution metric.""" - - predefined_metric_spec: Optional[PredefinedMetricSpecDict] - """The spec for a pre-defined metric.""" - - computation_based_metric_spec: Optional[ComputationBasedMetricSpecDict] - """The spec for a computation based metric.""" - - -UnifiedMetricOrDict = Union[UnifiedMetric, UnifiedMetricDict] - - class EvaluationRunMetric(_common.BaseModel): """The metric used for evaluation run.""" metric: Optional[str] = Field( default=None, description="""The name of the metric.""" ) - metric_config: Optional[UnifiedMetric] = Field( + metric_config: Optional[genai_types.UnifiedMetric] = Field( default=None, description="""The unified metric used for evaluation run.""" ) @@ -1091,7 +833,7 @@ class EvaluationRunMetricDict(TypedDict, total=False): metric: Optional[str] """The name of the metric.""" - metric_config: Optional[UnifiedMetricDict] + metric_config: Optional[genai_types.UnifiedMetricDict] """The unified metric used for evaluation run.""" @@ -2716,6 +2458,55 @@ class EvaluateInstancesConfigDict(TypedDict, total=False): ] +class RubricGenerationSpec(_common.BaseModel): + """Spec for generating rubrics.""" + + prompt_template: Optional[str] = Field( + default=None, + description="""Template for the prompt used to generate rubrics. + The details should be updated based on the most-recent recipe requirements.""", + ) + generator_model_config: Optional[genai_types.AutoraterConfig] = Field( + default=None, + description="""Configuration for the model used in rubric generation. + Configs including sampling count and base model can be specified here. + Flipping is not supported for rubric generation.""", + ) + rubric_content_type: Optional[RubricContentType] = Field( + default=None, description="""The type of rubric content to be generated.""" + ) + rubric_type_ontology: Optional[list[str]] = Field( + default=None, + description="""An optional, pre-defined list of allowed types for generated rubrics. + If this field is provided, it implies `include_rubric_type` should be true, + and the generated rubric types should be chosen from this ontology.""", + ) + + +class RubricGenerationSpecDict(TypedDict, total=False): + """Spec for generating rubrics.""" + + prompt_template: Optional[str] + """Template for the prompt used to generate rubrics. + The details should be updated based on the most-recent recipe requirements.""" + + generator_model_config: Optional[genai_types.AutoraterConfigDict] + """Configuration for the model used in rubric generation. + Configs including sampling count and base model can be specified here. + Flipping is not supported for rubric generation.""" + + rubric_content_type: Optional[RubricContentType] + """The type of rubric content to be generated.""" + + rubric_type_ontology: Optional[list[str]] + """An optional, pre-defined list of allowed types for generated rubrics. + If this field is provided, it implies `include_rubric_type` should be true, + and the generated rubric types should be chosen from this ontology.""" + + +RubricGenerationSpecOrDict = Union[RubricGenerationSpec, RubricGenerationSpecDict] + + class RubricBasedMetricSpec(_common.BaseModel): """Specification for a metric that is based on rubrics.""" @@ -3825,14 +3616,16 @@ class _GenerateInstanceRubricsRequest(_common.BaseModel): default=None, description="""The prompt to generate rubrics from. For single-turn queries, this is a single instance. For multi-turn queries, this is a repeated field that contains conversation history + latest request.""", ) - predefined_rubric_generation_spec: Optional[PredefinedMetricSpec] = Field( - default=None, - description="""Specification for using the rubric generation configs of a pre-defined + predefined_rubric_generation_spec: Optional[genai_types.PredefinedMetricSpec] = ( + Field( + default=None, + description="""Specification for using the rubric generation configs of a pre-defined metric, e.g. "generic_quality_v1" and "instruction_following_v1". Some of the configs may be only used in rubric generation and not supporting evaluation, e.g. "fully_customized_generic_quality_v1". If this field is set, the `rubric_generation_spec` field will be ignored. """, + ) ) rubric_generation_spec: Optional[RubricGenerationSpec] = Field( default=None, @@ -3847,7 +3640,7 @@ class _GenerateInstanceRubricsRequestDict(TypedDict, total=False): contents: Optional[list[genai_types.ContentDict]] """The prompt to generate rubrics from. For single-turn queries, this is a single instance. For multi-turn queries, this is a repeated field that contains conversation history + latest request.""" - predefined_rubric_generation_spec: Optional[PredefinedMetricSpecDict] + predefined_rubric_generation_spec: Optional[genai_types.PredefinedMetricSpecDict] """Specification for using the rubric generation configs of a pre-defined metric, e.g. "generic_quality_v1" and "instruction_following_v1". Some of the configs may be only used in rubric generation and not