From 887810f7281bef82ca5e2fd8394c4f3205e106c4 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Wed, 1 Apr 2026 09:47:51 -0700 Subject: [PATCH] feat: GenAI Client(evals) - add core data models and code-gen mapping for auto-loss analysis PiperOrigin-RevId: 892978545 --- tests/unit/vertexai/genai/test_evals.py | 47 ++++ vertexai/_genai/_transformers.py | 116 ++++++++ vertexai/_genai/evals.py | 147 ++++++++++ vertexai/_genai/types/__init__.py | 56 ++++ vertexai/_genai/types/common.py | 347 ++++++++++++++++++++++++ 5 files changed, 713 insertions(+) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 5ca67a448a..ffafbf1b6d 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -34,8 +34,10 @@ from vertexai._genai import _evals_metric_loaders from vertexai._genai import _gcs_utils from vertexai._genai import _observability_data_converter +from vertexai._genai import _transformers from vertexai._genai import evals from vertexai._genai import types as vertexai_genai_types +from vertexai._genai.types import common as common_types from google.genai import client from google.genai import errors as genai_errors from google.genai import types as genai_types @@ -218,6 +220,51 @@ def test_get_api_client_with_none_location( mock_vertexai_client.assert_not_called() +class TestTransformers: + """Unit tests for transformers.""" + + def test_t_inline_results(self): + eval_result = common_types.EvaluationResult( + eval_case_results=[ + common_types.EvalCaseResult( + eval_case_index=0, + response_candidate_results=[ + common_types.ResponseCandidateResult( + response_index=0, + metric_results={ + "tool_use_quality": common_types.EvalCaseMetricResult( + score=0.0, + explanation="Failed tool use", + ) + }, + ) + ], + ) + ], + evaluation_dataset=[ + common_types.EvaluationDataset( + eval_cases=[ + common_types.EvalCase( + prompt=genai_types.Content( + parts=[genai_types.Part(text="test prompt")] + ) + ) + ] + ) + ], + metadata=common_types.EvaluationRunMetadata(candidate_names=["gemini-pro"]), + ) + + payload = _transformers.t_inline_results([eval_result]) + + assert len(payload) == 1 + assert payload[0]["metric"] == "tool_use_quality" + assert payload[0]["request"]["prompt"]["text"] == "test prompt" + assert len(payload[0]["candidate_results"]) == 1 + assert payload[0]["candidate_results"][0]["candidate"] == "gemini-pro" + assert payload[0]["candidate_results"][0]["score"] == 0.0 + + class TestEvals: """Unit tests for the GenAI client.""" diff --git a/vertexai/_genai/_transformers.py b/vertexai/_genai/_transformers.py index 65ca401ae3..140f99134e 100644 --- a/vertexai/_genai/_transformers.py +++ b/vertexai/_genai/_transformers.py @@ -20,6 +20,7 @@ from google.genai._common import get_value_by_path as getv from . import _evals_constant +from . import _evals_data_converters from . import types _METRIC_RES_NAME_RE = r"^projects/[^/]+/locations/[^/]+/evaluationMetrics/[^/]+$" @@ -241,3 +242,118 @@ def t_metric_for_registry( raise ValueError(f"Unsupported metric type: {metric_name}") return metric_payload_item + + +def t_inline_results( + eval_results: list[Any], +) -> list[dict[str, Any]]: + """Transforms a list of SDK EvaluationResults into API EvaluationResults.""" + api_results: list[dict[str, Any]] = [] + + for eval_result in eval_results: + metadata = getv(eval_result, ["metadata"]) + candidate_names = getv(metadata, ["candidate_names"]) if metadata else [] + candidate_names = candidate_names or [] + + eval_dataset = getv(eval_result, ["evaluation_dataset"]) + eval_cases: list[Any] = [] + if isinstance(eval_dataset, list) and eval_dataset: + eval_cases = getv(eval_dataset[0], ["eval_cases"]) or [] + + eval_case_results = getv(eval_result, ["eval_case_results"]) or [] + + for case_result in eval_case_results: + case_idx = getv(case_result, ["eval_case_index"]) or 0 + + eval_case = None + if 0 <= case_idx < len(eval_cases): + eval_case = eval_cases[case_idx] + + prompt_payload = {} + if eval_case: + agent_data = getv(eval_case, ["agent_data"]) + prompt = getv(eval_case, ["prompt"]) + + if agent_data: + if hasattr(agent_data, "model_dump"): + prompt_payload["agent_data"] = agent_data.model_dump() + else: + prompt_payload["agent_data"] = agent_data + elif prompt: + text = _evals_data_converters._get_content_text( + prompt + ) # pylint: disable=protected-access + if text: + prompt_payload["text"] = str(text) + + cand_results = getv(case_result, ["response_candidate_results"]) or [] + for resp_cand_result in cand_results: + resp_idx = getv(resp_cand_result, ["response_index"]) or 0 + cand_name = f"candidate-{resp_idx}" + if 0 <= resp_idx < len(candidate_names): + cand_name = candidate_names[resp_idx] + + metric_results = getv(resp_cand_result, ["metric_results"]) or {} + + for metric_name, metric_res in metric_results.items(): + api_rubric_verdicts: list[dict[str, Any]] = [] + rubric_verdicts = getv(metric_res, ["rubric_verdicts"]) or [] + + for verdict in rubric_verdicts: + verdict_dict: dict[str, Any] = {} + eval_rubric = getv(verdict, ["evaluated_rubric"]) + + if eval_rubric: + rubric_content = getv(eval_rubric, ["content"]) + if rubric_content: + text = getv(rubric_content, ["text"]) + prop = getv(rubric_content, ["property"]) + + content_dict: dict[str, Any] = {} + if text: + content_dict["text"] = str(text) + if prop: + desc = getv(prop, ["description"]) + if desc: + content_dict["property"] = { + "description": str(desc) + } + verdict_dict["evaluated_rubric"] = { + "content": content_dict + } + + score = getv(verdict, ["score"]) + if score is not None: + verdict_dict["score"] = float(score) + + explanation = getv(verdict, ["explanation"]) + if explanation: + verdict_dict["explanation"] = str(explanation) + + if verdict_dict: + api_rubric_verdicts.append(verdict_dict) + + score = getv(metric_res, ["score"]) + explanation = getv(metric_res, ["explanation"]) + + candidate_result_payload: dict[str, Any] = { + "candidate": str(cand_name), + "metric": str(metric_name), + } + if score is not None: + candidate_result_payload["score"] = float(score) + if explanation: + candidate_result_payload["explanation"] = str(explanation) + if api_rubric_verdicts: + candidate_result_payload["rubric_verdicts"] = ( + api_rubric_verdicts + ) + + api_eval_result = { + "request": {"prompt": prompt_payload}, + "metric": str(metric_name), + "candidate_results": [candidate_result_payload], + } + api_results.append(api_eval_result) + + return api_results diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 5d5b128867..fa179618e7 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -595,6 +595,33 @@ def _GenerateInstanceRubricsRequest_to_vertex( return to_object +def _GenerateLossClustersParameters_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["location"]) is not None: + setv(to_object, ["location"], getv(from_object, ["location"])) + + if getv(from_object, ["evaluation_set"]) is not None: + setv(to_object, ["evaluationSet"], getv(from_object, ["evaluation_set"])) + + if getv(from_object, ["inline_results"]) is not None: + setv( + to_object, + ["inlineResults", "evaluationResults"], + t.t_inline_results(getv(from_object, ["inline_results"])), + ) + + if getv(from_object, ["configs"]) is not None: + setv(to_object, ["configs"], [item for item in getv(from_object, ["configs"])]) + + if getv(from_object, ["config"]) is not None: + setv(to_object, ["config"], getv(from_object, ["config"])) + + return to_object + + def _GenerateUserScenariosParameters_to_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -1268,6 +1295,65 @@ def _generate_user_scenarios( self._api_client._verify_response(return_value) return return_value + def _generate_loss_clusters( + self, + *, + location: Optional[str] = None, + evaluation_set: Optional[str] = None, + inline_results: Optional[list[types.EvaluationResultOrDict]] = None, + configs: Optional[list[types.LossAnalysisConfigOrDict]] = None, + config: Optional[types.GenerateLossClustersConfigOrDict] = None, + ) -> types.GenerateLossClustersOperation: + """ + Generates loss clusters from evaluation results. + """ + + parameter_model = types._GenerateLossClustersParameters( + location=location, + evaluation_set=evaluation_set, + inline_results=inline_results, + configs=configs, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError("This method is only supported in the Vertex AI client.") + else: + request_dict = _GenerateLossClustersParameters_to_vertex(parameter_model) + request_url_dict = request_dict.get("_url") + if request_url_dict: + path = ":generateLossClusters".format_map(request_url_dict) + else: + path = ":generateLossClusters" + + query_params = request_dict.get("_query") + if query_params: + path = f"{path}?{urlencode(query_params)}" + # TODO: remove the hack that pops config. + request_dict.pop("config", None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = self._api_client.request("post", path, request_dict, http_options) + + response_dict = {} if not response.body else json.loads(response.body) + + return_value = types.GenerateLossClustersOperation._from_response( + response=response_dict, kwargs=parameter_model.model_dump() + ) + + self._api_client._verify_response(return_value) + return return_value + def _generate_rubrics( self, *, @@ -2833,6 +2919,67 @@ async def _generate_user_scenarios( self._api_client._verify_response(return_value) return return_value + async def _generate_loss_clusters( + self, + *, + location: Optional[str] = None, + evaluation_set: Optional[str] = None, + inline_results: Optional[list[types.EvaluationResultOrDict]] = None, + configs: Optional[list[types.LossAnalysisConfigOrDict]] = None, + config: Optional[types.GenerateLossClustersConfigOrDict] = None, + ) -> types.GenerateLossClustersOperation: + """ + Generates loss clusters from evaluation results. + """ + + parameter_model = types._GenerateLossClustersParameters( + location=location, + evaluation_set=evaluation_set, + inline_results=inline_results, + configs=configs, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError("This method is only supported in the Vertex AI client.") + else: + request_dict = _GenerateLossClustersParameters_to_vertex(parameter_model) + request_url_dict = request_dict.get("_url") + if request_url_dict: + path = ":generateLossClusters".format_map(request_url_dict) + else: + path = ":generateLossClusters" + + query_params = request_dict.get("_query") + if query_params: + path = f"{path}?{urlencode(query_params)}" + # TODO: remove the hack that pops config. + request_dict.pop("config", None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = await self._api_client.async_request( + "post", path, request_dict, http_options + ) + + response_dict = {} if not response.body else json.loads(response.body) + + return_value = types.GenerateLossClustersOperation._from_response( + response=response_dict, kwargs=parameter_model.model_dump() + ) + + self._api_client._verify_response(return_value) + return return_value + async def _generate_rubrics( self, *, diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py index 937e8fe43f..97c0b9ca82 100644 --- a/vertexai/_genai/types/__init__.py +++ b/vertexai/_genai/types/__init__.py @@ -52,6 +52,7 @@ from .common import _ExecuteCodeAgentEngineSandboxRequestParameters from .common import _GenerateAgentEngineMemoriesRequestParameters from .common import _GenerateInstanceRubricsRequest +from .common import _GenerateLossClustersParameters from .common import _GenerateUserScenariosParameters from .common import _GetAgentEngineGenerateMemoriesOperationParameters from .common import _GetAgentEngineMemoryOperationParameters @@ -431,6 +432,9 @@ from .common import ExecuteSandboxEnvironmentResponse from .common import ExecuteSandboxEnvironmentResponseDict from .common import ExecuteSandboxEnvironmentResponseOrDict +from .common import FailedRubric +from .common import FailedRubricDict +from .common import FailedRubricOrDict from .common import Framework from .common import GeminiExample from .common import GeminiExampleDict @@ -447,6 +451,15 @@ from .common import GenerateInstanceRubricsResponse from .common import GenerateInstanceRubricsResponseDict from .common import GenerateInstanceRubricsResponseOrDict +from .common import GenerateLossClustersConfig +from .common import GenerateLossClustersConfigDict +from .common import GenerateLossClustersConfigOrDict +from .common import GenerateLossClustersOperation +from .common import GenerateLossClustersOperationDict +from .common import GenerateLossClustersOperationOrDict +from .common import GenerateLossClustersResponse +from .common import GenerateLossClustersResponseDict +from .common import GenerateLossClustersResponseOrDict from .common import GenerateMemoriesRequestDirectContentsSource from .common import GenerateMemoriesRequestDirectContentsSourceDict from .common import GenerateMemoriesRequestDirectContentsSourceEvent @@ -594,6 +607,21 @@ from .common import ListReasoningEnginesSessionsResponseDict from .common import ListReasoningEnginesSessionsResponseOrDict from .common import LLMMetric +from .common import LossAnalysisConfig +from .common import LossAnalysisConfigDict +from .common import LossAnalysisConfigOrDict +from .common import LossAnalysisResult +from .common import LossAnalysisResultDict +from .common import LossAnalysisResultOrDict +from .common import LossCluster +from .common import LossClusterDict +from .common import LossClusterOrDict +from .common import LossExample +from .common import LossExampleDict +from .common import LossExampleOrDict +from .common import LossTaxonomyEntry +from .common import LossTaxonomyEntryDict +from .common import LossTaxonomyEntryOrDict from .common import LustreMount from .common import LustreMountDict from .common import LustreMountOrDict @@ -1469,6 +1497,33 @@ "GenerateUserScenariosResponse", "GenerateUserScenariosResponseDict", "GenerateUserScenariosResponseOrDict", + "LossAnalysisConfig", + "LossAnalysisConfigDict", + "LossAnalysisConfigOrDict", + "GenerateLossClustersConfig", + "GenerateLossClustersConfigDict", + "GenerateLossClustersConfigOrDict", + "LossTaxonomyEntry", + "LossTaxonomyEntryDict", + "LossTaxonomyEntryOrDict", + "FailedRubric", + "FailedRubricDict", + "FailedRubricOrDict", + "LossExample", + "LossExampleDict", + "LossExampleOrDict", + "LossCluster", + "LossClusterDict", + "LossClusterOrDict", + "LossAnalysisResult", + "LossAnalysisResultDict", + "LossAnalysisResultOrDict", + "GenerateLossClustersResponse", + "GenerateLossClustersResponseDict", + "GenerateLossClustersResponseOrDict", + "GenerateLossClustersOperation", + "GenerateLossClustersOperationDict", + "GenerateLossClustersOperationOrDict", "RubricGenerationConfig", "RubricGenerationConfigDict", "RubricGenerationConfigOrDict", @@ -2183,6 +2238,7 @@ "_CreateEvaluationSetParameters", "_EvaluateInstancesRequestParameters", "_GenerateUserScenariosParameters", + "_GenerateLossClustersParameters", "_GenerateInstanceRubricsRequest", "_GetEvaluationMetricParameters", "_GetEvaluationRunParameters", diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index d549c5490c..2a8e2f107a 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -4666,6 +4666,353 @@ class GenerateUserScenariosResponseDict(TypedDict, total=False): ] +class LossAnalysisConfig(_common.BaseModel): + """Configuration for the loss analysis job.""" + + metric: Optional[str] = Field( + default=None, + description="""Required. The metric to analyze (e.g., "tool_use_quality"). This filters the EvaluationItems in the EvalSet to only those where EvaluationResult.metric matches this value.""", + ) + candidate: Optional[str] = Field( + default=None, + description="""Required. The candidate model/agent to analyze (e.g., "gemini-3.0-pro"). This targets the specific CandidateResult within the EvaluationResult.""", + ) + predefined_taxonomy: Optional[str] = Field( + default=None, + description="""Optional. The identifier for the pre-defined taxonomy to use (e.g., "agent_taxonomy_v1", "tool_use_v2"). If not specified, the service may select a default based on the metric.""", + ) + max_top_cluster_count: Optional[int] = Field( + default=None, + description="""Optional. Limits the analysis to the top N clusters. If not specified or set to 0, all clusters are returned.""", + ) + + +class LossAnalysisConfigDict(TypedDict, total=False): + """Configuration for the loss analysis job.""" + + metric: Optional[str] + """Required. The metric to analyze (e.g., "tool_use_quality"). This filters the EvaluationItems in the EvalSet to only those where EvaluationResult.metric matches this value.""" + + candidate: Optional[str] + """Required. The candidate model/agent to analyze (e.g., "gemini-3.0-pro"). This targets the specific CandidateResult within the EvaluationResult.""" + + predefined_taxonomy: Optional[str] + """Optional. The identifier for the pre-defined taxonomy to use (e.g., "agent_taxonomy_v1", "tool_use_v2"). If not specified, the service may select a default based on the metric.""" + + max_top_cluster_count: Optional[int] + """Optional. Limits the analysis to the top N clusters. If not specified or set to 0, all clusters are returned.""" + + +LossAnalysisConfigOrDict = Union[LossAnalysisConfig, LossAnalysisConfigDict] + + +class GenerateLossClustersConfig(_common.BaseModel): + """Config for generating loss clusters.""" + + http_options: Optional[genai_types.HttpOptions] = Field( + default=None, description="""Used to override HTTP request options.""" + ) + + +class GenerateLossClustersConfigDict(TypedDict, total=False): + """Config for generating loss clusters.""" + + http_options: Optional[genai_types.HttpOptionsDict] + """Used to override HTTP request options.""" + + +GenerateLossClustersConfigOrDict = Union[ + GenerateLossClustersConfig, GenerateLossClustersConfigDict +] + + +class _GenerateLossClustersParameters(_common.BaseModel): + """Parameters for GenerateLossClusters.""" + + location: Optional[str] = Field( + default=None, + description="""The resource name of the Location. Format: `projects/{project}/locations/{location}`.""", + ) + evaluation_set: Optional[str] = Field( + default=None, + description="""Reference to a persisted EvaluationSet. The service will read items from this set.""", + ) + inline_results: Optional[list[EvaluationResult]] = Field( + default=None, + description="""Inline evaluation results. Useful for ephemeral analysis in notebooks/SDKs where data isn't persisted.""", + ) + configs: Optional[list[LossAnalysisConfig]] = Field( + default=None, + description="""Configuration for the analysis algorithm. Analysis for multiple metrics and multiple candidates could be specified.""", + ) + config: Optional[GenerateLossClustersConfig] = Field( + default=None, description="""Config for generating loss clusters.""" + ) + + +class _GenerateLossClustersParametersDict(TypedDict, total=False): + """Parameters for GenerateLossClusters.""" + + location: Optional[str] + """The resource name of the Location. Format: `projects/{project}/locations/{location}`.""" + + evaluation_set: Optional[str] + """Reference to a persisted EvaluationSet. The service will read items from this set.""" + + inline_results: Optional[list[EvaluationResultDict]] + """Inline evaluation results. Useful for ephemeral analysis in notebooks/SDKs where data isn't persisted.""" + + configs: Optional[list[LossAnalysisConfigDict]] + """Configuration for the analysis algorithm. Analysis for multiple metrics and multiple candidates could be specified.""" + + config: Optional[GenerateLossClustersConfigDict] + """Config for generating loss clusters.""" + + +_GenerateLossClustersParametersOrDict = Union[ + _GenerateLossClustersParameters, _GenerateLossClustersParametersDict +] + + +class LossTaxonomyEntry(_common.BaseModel): + """A specific entry in the loss pattern taxonomy.""" + + l1_category: Optional[str] = Field( + default=None, + description="""The primary category of the loss (e.g., "Hallucination", "Tool Calling").""", + ) + l2_category: Optional[str] = Field( + default=None, + description="""The secondary category of the loss (e.g., "Hallucination of Action", "Incorrect Tool Selection").""", + ) + description: Optional[str] = Field( + default=None, + description="""A detailed description of this loss pattern. Example: "The agent verbally confirms an action without executing the tool." """, + ) + + +class LossTaxonomyEntryDict(TypedDict, total=False): + """A specific entry in the loss pattern taxonomy.""" + + l1_category: Optional[str] + """The primary category of the loss (e.g., "Hallucination", "Tool Calling").""" + + l2_category: Optional[str] + """The secondary category of the loss (e.g., "Hallucination of Action", "Incorrect Tool Selection").""" + + description: Optional[str] + """A detailed description of this loss pattern. Example: "The agent verbally confirms an action without executing the tool." """ + + +LossTaxonomyEntryOrDict = Union[LossTaxonomyEntry, LossTaxonomyEntryDict] + + +class FailedRubric(_common.BaseModel): + """A specific failed rubric and the associated analysis.""" + + rubric_id: Optional[str] = Field( + default=None, + description="""The unique ID of the rubric (if available from the metric source).""", + ) + classification_rationale: Optional[str] = Field( + default=None, + description="""The rationale provided by the Loss Analysis Classifier for why this failure maps to this specific Loss Cluster.""", + ) + + +class FailedRubricDict(TypedDict, total=False): + """A specific failed rubric and the associated analysis.""" + + rubric_id: Optional[str] + """The unique ID of the rubric (if available from the metric source).""" + + classification_rationale: Optional[str] + """The rationale provided by the Loss Analysis Classifier for why this failure maps to this specific Loss Cluster.""" + + +FailedRubricOrDict = Union[FailedRubric, FailedRubricDict] + + +class LossExample(_common.BaseModel): + """A specific example of a loss pattern.""" + + evaluation_item: Optional[str] = Field( + default=None, + description="""Reference to the persisted EvalItem resource name. Format: projects/.../locations/.../evaluationItems/{item_id}. Used when analysis is run on an EvalSet.""", + ) + evaluation_result: Optional[dict[str, Any]] = Field( + default=None, + description="""The full evaluation result object provided inline. Used when the analysis is performed on ephemeral data (without an EvaluationSet).""", + ) + failed_rubrics: Optional[list[FailedRubric]] = Field( + default=None, + description="""The specific rubric(s) that failed and caused this example to be classified here. An example might fail multiple rubrics, but only specific ones trigger this loss pattern.""", + ) + + +class LossExampleDict(TypedDict, total=False): + """A specific example of a loss pattern.""" + + evaluation_item: Optional[str] + """Reference to the persisted EvalItem resource name. Format: projects/.../locations/.../evaluationItems/{item_id}. Used when analysis is run on an EvalSet.""" + + evaluation_result: Optional[dict[str, Any]] + """The full evaluation result object provided inline. Used when the analysis is performed on ephemeral data (without an EvaluationSet).""" + + failed_rubrics: Optional[list[FailedRubricDict]] + """The specific rubric(s) that failed and caused this example to be classified here. An example might fail multiple rubrics, but only specific ones trigger this loss pattern.""" + + +LossExampleOrDict = Union[LossExample, LossExampleDict] + + +class LossCluster(_common.BaseModel): + """A semantic grouping of failures (e.g., "Hallucination of Action").""" + + cluster_id: Optional[str] = Field( + default=None, + description="""Unique identifier for the loss cluster within the scope of the analysis result.""", + ) + taxonomy_entry: Optional[LossTaxonomyEntry] = Field( + default=None, + description="""The structured definition of the loss taxonomy for this cluster.""", + ) + item_count: Optional[int] = Field( + default=None, + description="""The total number of EvaluationItems falling into this cluster.""", + ) + examples: Optional[list[LossExample]] = Field( + default=None, + description="""A list of examples that belong to this cluster. This links the cluster back to the specific EvaluationItems and Rubrics.""", + ) + + +class LossClusterDict(TypedDict, total=False): + """A semantic grouping of failures (e.g., "Hallucination of Action").""" + + cluster_id: Optional[str] + """Unique identifier for the loss cluster within the scope of the analysis result.""" + + taxonomy_entry: Optional[LossTaxonomyEntryDict] + """The structured definition of the loss taxonomy for this cluster.""" + + item_count: Optional[int] + """The total number of EvaluationItems falling into this cluster.""" + + examples: Optional[list[LossExampleDict]] + """A list of examples that belong to this cluster. This links the cluster back to the specific EvaluationItems and Rubrics.""" + + +LossClusterOrDict = Union[LossCluster, LossClusterDict] + + +class LossAnalysisResult(_common.BaseModel): + """The top-level result for loss analysis, stored within an EvalSet.""" + + config: Optional[LossAnalysisConfig] = Field( + default=None, + description="""The configuration used to generate this analysis.""", + ) + analysis_time: Optional[str] = Field( + default=None, description="""The timestamp when this analysis was performed.""" + ) + clusters: Optional[list[LossCluster]] = Field( + default=None, description="""The list of identified loss clusters.""" + ) + + +class LossAnalysisResultDict(TypedDict, total=False): + """The top-level result for loss analysis, stored within an EvalSet.""" + + config: Optional[LossAnalysisConfigDict] + """The configuration used to generate this analysis.""" + + analysis_time: Optional[str] + """The timestamp when this analysis was performed.""" + + clusters: Optional[list[LossClusterDict]] + """The list of identified loss clusters.""" + + +LossAnalysisResultOrDict = Union[LossAnalysisResult, LossAnalysisResultDict] + + +class GenerateLossClustersResponse(_common.BaseModel): + """Response message for EvaluationAnalyticsService.GenerateLossClusters.""" + + analysis_time: Optional[str] = Field( + default=None, description="""The timestamp when this analysis was completed.""" + ) + results: Optional[list[LossAnalysisResult]] = Field( + default=None, + description="""The analysis results, one per config provided in the request.""", + ) + + +class GenerateLossClustersResponseDict(TypedDict, total=False): + """Response message for EvaluationAnalyticsService.GenerateLossClusters.""" + + analysis_time: Optional[str] + """The timestamp when this analysis was completed.""" + + results: Optional[list[LossAnalysisResultDict]] + """The analysis results, one per config provided in the request.""" + + +GenerateLossClustersResponseOrDict = Union[ + GenerateLossClustersResponse, GenerateLossClustersResponseDict +] + + +class GenerateLossClustersOperation(_common.BaseModel): + """Long-running operation for generating loss clusters.""" + + name: Optional[str] = Field( + default=None, + description="""The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.""", + ) + metadata: Optional[dict[str, Any]] = Field( + default=None, + description="""Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.""", + ) + done: Optional[bool] = Field( + default=None, + description="""If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.""", + ) + error: Optional[dict[str, Any]] = Field( + default=None, + description="""The error result of the operation in case of failure or cancellation.""", + ) + response: Optional[GenerateLossClustersResponse] = Field( + default=None, + description="""Response message for EvaluationAnalyticsService.GenerateLossClusters.""", + ) + + +class GenerateLossClustersOperationDict(TypedDict, total=False): + """Long-running operation for generating loss clusters.""" + + name: Optional[str] + """The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.""" + + metadata: Optional[dict[str, Any]] + """Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.""" + + done: Optional[bool] + """If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.""" + + error: Optional[dict[str, Any]] + """The error result of the operation in case of failure or cancellation.""" + + response: Optional[GenerateLossClustersResponseDict] + """Response message for EvaluationAnalyticsService.GenerateLossClusters.""" + + +GenerateLossClustersOperationOrDict = Union[ + GenerateLossClustersOperation, GenerateLossClustersOperationDict +] + + class RubricGenerationConfig(_common.BaseModel): """Config for generating rubrics."""