From 4c5eb44fcfba3b3ab5d7fb3661b80e58bc64fcd1 Mon Sep 17 00:00:00 2001
From: langfuse-bot <langfuse-bot@langfuse.com>
Date: Thu, 28 May 2026 13:56:18 +0000
Subject: [PATCH] feat(api): update API spec from langfuse/langfuse 41f5847

---
 langfuse/api/__init__.py                      |   12 +
 .../api/blob_storage_integrations/__init__.py |    6 +
 .../api/blob_storage_integrations/client.py   |   46 +-
 .../blob_storage_integrations/raw_client.py   |   42 +
 .../types/__init__.py                         |    6 +
 .../types/blob_storage_export_field_group.py  |   62 +
 .../types/blob_storage_export_frequency.py    |    4 +
 .../types/blob_storage_export_source.py       |   35 +
 .../blob_storage_integration_response.py      |   13 +
 .../types/blob_storage_sync_status.py         |    4 +-
 ...create_blob_storage_integration_request.py |   25 +
 langfuse/api/client.py                        |   19 +
 langfuse/api/commons/types/observation_v2.py  |   47 +-
 langfuse/api/legacy/__init__.py               |    4 +-
 langfuse/api/legacy/score_v1/__init__.py      |    5 +-
 langfuse/api/legacy/score_v1/client.py        |   11 +
 langfuse/api/legacy/score_v1/raw_client.py    |   11 +
 .../api/legacy/score_v1/types/__init__.py     |    4 +-
 .../score_v1/types/create_score_request.py    |    6 +
 .../score_v1/types/create_score_source.py     |   28 +
 langfuse/api/llm_connections/__init__.py      |    3 +
 langfuse/api/llm_connections/client.py        |   81 +
 langfuse/api/llm_connections/raw_client.py    |  202 ++
 .../api/llm_connections/types/__init__.py     |    3 +
 .../types/delete_llm_connection_response.py   |   14 +
 langfuse/api/observations/client.py           |   38 +-
 langfuse/api/observations/raw_client.py       |   38 +-
 langfuse/api/score_configs/client.py          |    6 +-
 langfuse/api/score_configs/raw_client.py      |    6 +-
 .../types/create_score_config_request.py      |    6 +-
 .../types/update_score_config_request.py      |    2 +-
 langfuse/api/scores/client.py                 |    4 +-
 langfuse/api/scores/raw_client.py             |    4 +-
 langfuse/api/unstable/__init__.py             |  267 ++
 langfuse/api/unstable/client.py               |   91 +
 langfuse/api/unstable/commons/__init__.py     |  187 ++
 .../api/unstable/commons/types/__init__.py    |  211 ++
 .../array_options_evaluation_rule_filter.py   |   26 +
 .../types/boolean_evaluation_rule_filter.py   |   21 +
 ...category_options_evaluation_rule_filter.py |   26 +
 .../types/date_time_evaluation_rule_filter.py |   29 +
 ...tion_rule_array_options_filter_operator.py |   26 +
 ...evaluation_rule_boolean_filter_operator.py |   22 +
 .../commons/types/evaluation_rule_filter.py   |  740 ++++++
 .../commons/types/evaluation_rule_mapping.py  |   74 +
 .../types/evaluation_rule_mapping_source.py   |   51 +
 .../evaluation_rule_null_filter_operator.py   |   22 +
 .../evaluation_rule_number_filter_operator.py |   34 +
 ...evaluation_rule_options_filter_operator.py |   22 +
 .../commons/types/evaluation_rule_status.py   |   34 +
 .../evaluation_rule_string_filter_operator.py |   34 +
 .../commons/types/evaluation_rule_target.py   |   33 +
 .../commons/types/evaluator_model_config.py   |   46 +
 .../types/evaluator_output_data_type.py       |   35 +
 .../types/evaluator_output_definition.py      |  161 ++
 .../evaluator_output_field_definition.py      |   17 +
 .../unstable/commons/types/evaluator_scope.py |   29 +
 .../unstable/commons/types/evaluator_type.py  |   21 +
 .../types/null_evaluation_rule_filter.py      |   24 +
 .../types/number_evaluation_rule_filter.py    |   21 +
 .../number_object_evaluation_rule_filter.py   |   26 +
 ...lic_boolean_evaluator_output_definition.py |   26 +
 ...categorical_evaluator_output_definition.py |   29 +
 ...rical_evaluator_output_score_definition.py |   20 +
 .../public_evaluator_output_definition.py     |  167 ++
 ...lic_numeric_evaluator_output_definition.py |   26 +
 .../types/string_evaluation_rule_filter.py    |   21 +
 .../string_object_evaluation_rule_filter.py   |   26 +
 .../string_options_evaluation_rule_filter.py  |   24 +
 langfuse/api/unstable/errors/__init__.py      |   84 +
 .../api/unstable/errors/errors/__init__.py    |   68 +
 .../errors/errors/access_denied_error.py      |   15 +
 .../errors/errors/bad_request_error.py        |   15 +
 .../unstable/errors/errors/conflict_error.py  |   15 +
 .../errors/errors/internal_server_error.py    |   15 +
 .../errors/errors/method_not_allowed_error.py |   15 +
 .../unstable/errors/errors/not_found_error.py |   15 +
 .../errors/errors/too_many_requests_error.py  |   15 +
 .../errors/errors/unauthorized_error.py       |   15 +
 .../errors/unprocessable_content_error.py     |   15 +
 .../api/unstable/errors/types/__init__.py     |   53 +
 .../unstable/errors/types/public_api_error.py |   58 +
 .../errors/types/public_api_error_code.py     |   93 +
 .../errors/types/public_api_error_details.py  |  114 +
 .../types/public_api_validation_issue.py      |   34 +
 .../api/unstable/evaluation_rules/__init__.py |   64 +
 .../api/unstable/evaluation_rules/client.py   |  859 +++++++
 .../unstable/evaluation_rules/raw_client.py   | 2271 +++++++++++++++++
 .../evaluation_rules/types/__init__.py        |   62 +
 .../types/create_evaluation_rule_request.py   |   75 +
 .../types/delete_evaluation_rule_response.py  |   21 +
 .../evaluation_rules/types/evaluation_rule.py |  172 ++
 .../types/evaluation_rule_evaluator.py        |   35 +
 .../evaluation_rule_evaluator_reference.py    |   29 +
 .../types/evaluation_rules.py                 |   28 +
 .../types/update_evaluation_rule_request.py   |   74 +
 langfuse/api/unstable/evaluators/__init__.py  |   44 +
 langfuse/api/unstable/evaluators/client.py    |  458 ++++
 .../api/unstable/evaluators/raw_client.py     | 1278 ++++++++++
 .../api/unstable/evaluators/types/__init__.py |   46 +
 .../types/create_evaluator_request.py         |   50 +
 .../unstable/evaluators/types/evaluator.py    |  118 +
 .../unstable/evaluators/types/evaluators.py   |   17 +
 langfuse/api/unstable/raw_client.py           |   13 +
 104 files changed, 9784 insertions(+), 35 deletions(-)
 create mode 100644 langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py
 create mode 100644 langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py
 create mode 100644 langfuse/api/legacy/score_v1/types/create_score_source.py
 create mode 100644 langfuse/api/llm_connections/types/delete_llm_connection_response.py
 create mode 100644 langfuse/api/unstable/__init__.py
 create mode 100644 langfuse/api/unstable/client.py
 create mode 100644 langfuse/api/unstable/commons/__init__.py
 create mode 100644 langfuse/api/unstable/commons/types/__init__.py
 create mode 100644 langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_mapping.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_status.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_target.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluator_model_config.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluator_output_data_type.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluator_output_definition.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluator_output_field_definition.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluator_scope.py
 create mode 100644 langfuse/api/unstable/commons/types/evaluator_type.py
 create mode 100644 langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py
 create mode 100644 langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py
 create mode 100644 langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py
 create mode 100644 langfuse/api/unstable/commons/types/public_evaluator_output_definition.py
 create mode 100644 langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py
 create mode 100644 langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py
 create mode 100644 langfuse/api/unstable/errors/__init__.py
 create mode 100644 langfuse/api/unstable/errors/errors/__init__.py
 create mode 100644 langfuse/api/unstable/errors/errors/access_denied_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/bad_request_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/conflict_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/internal_server_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/method_not_allowed_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/not_found_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/too_many_requests_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/unauthorized_error.py
 create mode 100644 langfuse/api/unstable/errors/errors/unprocessable_content_error.py
 create mode 100644 langfuse/api/unstable/errors/types/__init__.py
 create mode 100644 langfuse/api/unstable/errors/types/public_api_error.py
 create mode 100644 langfuse/api/unstable/errors/types/public_api_error_code.py
 create mode 100644 langfuse/api/unstable/errors/types/public_api_error_details.py
 create mode 100644 langfuse/api/unstable/errors/types/public_api_validation_issue.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/__init__.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/client.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/raw_client.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/__init__.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py
 create mode 100644 langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py
 create mode 100644 langfuse/api/unstable/evaluators/__init__.py
 create mode 100644 langfuse/api/unstable/evaluators/client.py
 create mode 100644 langfuse/api/unstable/evaluators/raw_client.py
 create mode 100644 langfuse/api/unstable/evaluators/types/__init__.py
 create mode 100644 langfuse/api/unstable/evaluators/types/create_evaluator_request.py
 create mode 100644 langfuse/api/unstable/evaluators/types/evaluator.py
 create mode 100644 langfuse/api/unstable/evaluators/types/evaluators.py
 create mode 100644 langfuse/api/unstable/raw_client.py

diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py
index aa103cf12..0e036263a 100644
--- a/langfuse/api/__init__.py
+++ b/langfuse/api/__init__.py
@@ -32,6 +32,7 @@
         scores,
         sessions,
         trace,
+        unstable,
         utils,
     )
     from .annotation_queues import (
@@ -50,8 +51,10 @@
         UpdateAnnotationQueueItemRequest,
     )
     from .blob_storage_integrations import (
+        BlobStorageExportFieldGroup,
         BlobStorageExportFrequency,
         BlobStorageExportMode,
+        BlobStorageExportSource,
         BlobStorageIntegrationDeletionResponse,
         BlobStorageIntegrationFileType,
         BlobStorageIntegrationResponse,
@@ -186,6 +189,7 @@
         UsageDetails,
     )
     from .llm_connections import (
+        DeleteLlmConnectionResponse,
         LlmAdapter,
         LlmConnection,
         PaginatedLlmConnections,
@@ -312,8 +316,10 @@
     "BasePrompt": ".prompts",
     "BaseScore": ".commons",
     "BaseScoreV1": ".commons",
+    "BlobStorageExportFieldGroup": ".blob_storage_integrations",
     "BlobStorageExportFrequency": ".blob_storage_integrations",
     "BlobStorageExportMode": ".blob_storage_integrations",
+    "BlobStorageExportSource": ".blob_storage_integrations",
     "BlobStorageIntegrationDeletionResponse": ".blob_storage_integrations",
     "BlobStorageIntegrationFileType": ".blob_storage_integrations",
     "BlobStorageIntegrationResponse": ".blob_storage_integrations",
@@ -368,6 +374,7 @@
     "DeleteAnnotationQueueItemResponse": ".annotation_queues",
     "DeleteDatasetItemResponse": ".dataset_items",
     "DeleteDatasetRunResponse": ".datasets",
+    "DeleteLlmConnectionResponse": ".llm_connections",
     "DeleteMembershipRequest": ".organizations",
     "DeleteTraceResponse": ".trace",
     "EmptyResponse": ".scim",
@@ -557,6 +564,7 @@
     "scores": ".scores",
     "sessions": ".sessions",
     "trace": ".trace",
+    "unstable": ".unstable",
     "utils": ".utils",
 }
 
@@ -605,8 +613,10 @@ def __dir__():
     "BasePrompt",
     "BaseScore",
     "BaseScoreV1",
+    "BlobStorageExportFieldGroup",
     "BlobStorageExportFrequency",
     "BlobStorageExportMode",
+    "BlobStorageExportSource",
     "BlobStorageIntegrationDeletionResponse",
     "BlobStorageIntegrationFileType",
     "BlobStorageIntegrationResponse",
@@ -661,6 +671,7 @@ def __dir__():
     "DeleteAnnotationQueueItemResponse",
     "DeleteDatasetItemResponse",
     "DeleteDatasetRunResponse",
+    "DeleteLlmConnectionResponse",
     "DeleteMembershipRequest",
     "DeleteTraceResponse",
     "EmptyResponse",
@@ -850,5 +861,6 @@ def __dir__():
     "scores",
     "sessions",
     "trace",
+    "unstable",
     "utils",
 ]
diff --git a/langfuse/api/blob_storage_integrations/__init__.py b/langfuse/api/blob_storage_integrations/__init__.py
index 266be2a6c..d92046ef2 100644
--- a/langfuse/api/blob_storage_integrations/__init__.py
+++ b/langfuse/api/blob_storage_integrations/__init__.py
@@ -7,8 +7,10 @@
 
 if typing.TYPE_CHECKING:
     from .types import (
+        BlobStorageExportFieldGroup,
         BlobStorageExportFrequency,
         BlobStorageExportMode,
+        BlobStorageExportSource,
         BlobStorageIntegrationDeletionResponse,
         BlobStorageIntegrationFileType,
         BlobStorageIntegrationResponse,
@@ -19,8 +21,10 @@
         CreateBlobStorageIntegrationRequest,
     )
 _dynamic_imports: typing.Dict[str, str] = {
+    "BlobStorageExportFieldGroup": ".types",
     "BlobStorageExportFrequency": ".types",
     "BlobStorageExportMode": ".types",
+    "BlobStorageExportSource": ".types",
     "BlobStorageIntegrationDeletionResponse": ".types",
     "BlobStorageIntegrationFileType": ".types",
     "BlobStorageIntegrationResponse": ".types",
@@ -60,8 +64,10 @@ def __dir__():
 
 
 __all__ = [
+    "BlobStorageExportFieldGroup",
     "BlobStorageExportFrequency",
     "BlobStorageExportMode",
+    "BlobStorageExportSource",
     "BlobStorageIntegrationDeletionResponse",
     "BlobStorageIntegrationFileType",
     "BlobStorageIntegrationResponse",
diff --git a/langfuse/api/blob_storage_integrations/client.py b/langfuse/api/blob_storage_integrations/client.py
index 21eeffde3..609e83fd3 100644
--- a/langfuse/api/blob_storage_integrations/client.py
+++ b/langfuse/api/blob_storage_integrations/client.py
@@ -9,8 +9,10 @@
     AsyncRawBlobStorageIntegrationsClient,
     RawBlobStorageIntegrationsClient,
 )
+from .types.blob_storage_export_field_group import BlobStorageExportFieldGroup
 from .types.blob_storage_export_frequency import BlobStorageExportFrequency
 from .types.blob_storage_export_mode import BlobStorageExportMode
+from .types.blob_storage_export_source import BlobStorageExportSource
 from .types.blob_storage_integration_deletion_response import (
     BlobStorageIntegrationDeletionResponse,
 )
@@ -95,6 +97,10 @@ def upsert_blob_storage_integration(
         prefix: typing.Optional[str] = OMIT,
         export_start_date: typing.Optional[dt.datetime] = OMIT,
         compressed: typing.Optional[bool] = OMIT,
+        export_source: typing.Optional[BlobStorageExportSource] = OMIT,
+        export_field_groups: typing.Optional[
+            typing.Sequence[BlobStorageExportFieldGroup]
+        ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> BlobStorageIntegrationResponse:
         """
@@ -143,6 +149,20 @@ def upsert_blob_storage_integration(
         compressed : typing.Optional[bool]
             Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true.
 
+        export_source : typing.Optional[BlobStorageExportSource]
+            Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided.
+
+            **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected.
+
+        export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]]
+            Field groups to include in each exported row.
+
+            For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved.
+
+            For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups.
+
+            `exportFieldGroups` requires `exportSource` to be provided in the same request.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -173,7 +193,7 @@ def upsert_blob_storage_integration(
             type=BlobStorageIntegrationType.S3,
             bucket_name="bucketName",
             region="region",
-            export_frequency=BlobStorageExportFrequency.HOURLY,
+            export_frequency=BlobStorageExportFrequency.EVERY20MINUTES,
             enabled=True,
             force_path_style=True,
             file_type=BlobStorageIntegrationFileType.JSON,
@@ -196,6 +216,8 @@ def upsert_blob_storage_integration(
             prefix=prefix,
             export_start_date=export_start_date,
             compressed=compressed,
+            export_source=export_source,
+            export_field_groups=export_field_groups,
             request_options=request_options,
         )
         return _response.data
@@ -354,6 +376,10 @@ async def upsert_blob_storage_integration(
         prefix: typing.Optional[str] = OMIT,
         export_start_date: typing.Optional[dt.datetime] = OMIT,
         compressed: typing.Optional[bool] = OMIT,
+        export_source: typing.Optional[BlobStorageExportSource] = OMIT,
+        export_field_groups: typing.Optional[
+            typing.Sequence[BlobStorageExportFieldGroup]
+        ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> BlobStorageIntegrationResponse:
         """
@@ -402,6 +428,20 @@ async def upsert_blob_storage_integration(
         compressed : typing.Optional[bool]
             Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true.
 
+        export_source : typing.Optional[BlobStorageExportSource]
+            Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided.
+
+            **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected.
+
+        export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]]
+            Field groups to include in each exported row.
+
+            For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved.
+
+            For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups.
+
+            `exportFieldGroups` requires `exportSource` to be provided in the same request.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -437,7 +477,7 @@ async def main() -> None:
                 type=BlobStorageIntegrationType.S3,
                 bucket_name="bucketName",
                 region="region",
-                export_frequency=BlobStorageExportFrequency.HOURLY,
+                export_frequency=BlobStorageExportFrequency.EVERY20MINUTES,
                 enabled=True,
                 force_path_style=True,
                 file_type=BlobStorageIntegrationFileType.JSON,
@@ -463,6 +503,8 @@ async def main() -> None:
             prefix=prefix,
             export_start_date=export_start_date,
             compressed=compressed,
+            export_source=export_source,
+            export_field_groups=export_field_groups,
             request_options=request_options,
         )
         return _response.data
diff --git a/langfuse/api/blob_storage_integrations/raw_client.py b/langfuse/api/blob_storage_integrations/raw_client.py
index 5833ea63e..09e036db6 100644
--- a/langfuse/api/blob_storage_integrations/raw_client.py
+++ b/langfuse/api/blob_storage_integrations/raw_client.py
@@ -15,8 +15,10 @@
 from ..core.jsonable_encoder import jsonable_encoder
 from ..core.pydantic_utilities import parse_obj_as
 from ..core.request_options import RequestOptions
+from .types.blob_storage_export_field_group import BlobStorageExportFieldGroup
 from .types.blob_storage_export_frequency import BlobStorageExportFrequency
 from .types.blob_storage_export_mode import BlobStorageExportMode
+from .types.blob_storage_export_source import BlobStorageExportSource
 from .types.blob_storage_integration_deletion_response import (
     BlobStorageIntegrationDeletionResponse,
 )
@@ -152,6 +154,10 @@ def upsert_blob_storage_integration(
         prefix: typing.Optional[str] = OMIT,
         export_start_date: typing.Optional[dt.datetime] = OMIT,
         compressed: typing.Optional[bool] = OMIT,
+        export_source: typing.Optional[BlobStorageExportSource] = OMIT,
+        export_field_groups: typing.Optional[
+            typing.Sequence[BlobStorageExportFieldGroup]
+        ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[BlobStorageIntegrationResponse]:
         """
@@ -200,6 +206,20 @@ def upsert_blob_storage_integration(
         compressed : typing.Optional[bool]
             Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true.
 
+        export_source : typing.Optional[BlobStorageExportSource]
+            Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided.
+
+            **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected.
+
+        export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]]
+            Field groups to include in each exported row.
+
+            For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved.
+
+            For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups.
+
+            `exportFieldGroups` requires `exportSource` to be provided in the same request.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -226,6 +246,8 @@ def upsert_blob_storage_integration(
                 "exportMode": export_mode,
                 "exportStartDate": export_start_date,
                 "compressed": compressed,
+                "exportSource": export_source,
+                "exportFieldGroups": export_field_groups,
             },
             request_options=request_options,
             omit=OMIT,
@@ -629,6 +651,10 @@ async def upsert_blob_storage_integration(
         prefix: typing.Optional[str] = OMIT,
         export_start_date: typing.Optional[dt.datetime] = OMIT,
         compressed: typing.Optional[bool] = OMIT,
+        export_source: typing.Optional[BlobStorageExportSource] = OMIT,
+        export_field_groups: typing.Optional[
+            typing.Sequence[BlobStorageExportFieldGroup]
+        ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[BlobStorageIntegrationResponse]:
         """
@@ -677,6 +703,20 @@ async def upsert_blob_storage_integration(
         compressed : typing.Optional[bool]
             Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true.
 
+        export_source : typing.Optional[BlobStorageExportSource]
+            Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided.
+
+            **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected.
+
+        export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]]
+            Field groups to include in each exported row.
+
+            For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved.
+
+            For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups.
+
+            `exportFieldGroups` requires `exportSource` to be provided in the same request.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -703,6 +743,8 @@ async def upsert_blob_storage_integration(
                 "exportMode": export_mode,
                 "exportStartDate": export_start_date,
                 "compressed": compressed,
+                "exportSource": export_source,
+                "exportFieldGroups": export_field_groups,
             },
             request_options=request_options,
             omit=OMIT,
diff --git a/langfuse/api/blob_storage_integrations/types/__init__.py b/langfuse/api/blob_storage_integrations/types/__init__.py
index e0fe3e9ff..3a2a0e1ec 100644
--- a/langfuse/api/blob_storage_integrations/types/__init__.py
+++ b/langfuse/api/blob_storage_integrations/types/__init__.py
@@ -6,8 +6,10 @@
 from importlib import import_module
 
 if typing.TYPE_CHECKING:
+    from .blob_storage_export_field_group import BlobStorageExportFieldGroup
     from .blob_storage_export_frequency import BlobStorageExportFrequency
     from .blob_storage_export_mode import BlobStorageExportMode
+    from .blob_storage_export_source import BlobStorageExportSource
     from .blob_storage_integration_deletion_response import (
         BlobStorageIntegrationDeletionResponse,
     )
@@ -23,8 +25,10 @@
         CreateBlobStorageIntegrationRequest,
     )
 _dynamic_imports: typing.Dict[str, str] = {
+    "BlobStorageExportFieldGroup": ".blob_storage_export_field_group",
     "BlobStorageExportFrequency": ".blob_storage_export_frequency",
     "BlobStorageExportMode": ".blob_storage_export_mode",
+    "BlobStorageExportSource": ".blob_storage_export_source",
     "BlobStorageIntegrationDeletionResponse": ".blob_storage_integration_deletion_response",
     "BlobStorageIntegrationFileType": ".blob_storage_integration_file_type",
     "BlobStorageIntegrationResponse": ".blob_storage_integration_response",
@@ -64,8 +68,10 @@ def __dir__():
 
 
 __all__ = [
+    "BlobStorageExportFieldGroup",
     "BlobStorageExportFrequency",
     "BlobStorageExportMode",
+    "BlobStorageExportSource",
     "BlobStorageIntegrationDeletionResponse",
     "BlobStorageIntegrationFileType",
     "BlobStorageIntegrationResponse",
diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py b/langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py
new file mode 100644
index 000000000..c21a9c3bb
--- /dev/null
+++ b/langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py
@@ -0,0 +1,62 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ...core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class BlobStorageExportFieldGroup(enum.StrEnum):
+    """
+    Field group for the OBSERVATIONS_V2 and LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS export.
+    """
+
+    CORE = "core"
+    BASIC = "basic"
+    TIME = "time"
+    IO = "io"
+    METADATA = "metadata"
+    MODEL = "model"
+    USAGE = "usage"
+    PROMPT = "prompt"
+    METRICS = "metrics"
+    TOOLS = "tools"
+    TRACE_CONTEXT = "trace_context"
+
+    def visit(
+        self,
+        core: typing.Callable[[], T_Result],
+        basic: typing.Callable[[], T_Result],
+        time: typing.Callable[[], T_Result],
+        io: typing.Callable[[], T_Result],
+        metadata: typing.Callable[[], T_Result],
+        model: typing.Callable[[], T_Result],
+        usage: typing.Callable[[], T_Result],
+        prompt: typing.Callable[[], T_Result],
+        metrics: typing.Callable[[], T_Result],
+        tools: typing.Callable[[], T_Result],
+        trace_context: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is BlobStorageExportFieldGroup.CORE:
+            return core()
+        if self is BlobStorageExportFieldGroup.BASIC:
+            return basic()
+        if self is BlobStorageExportFieldGroup.TIME:
+            return time()
+        if self is BlobStorageExportFieldGroup.IO:
+            return io()
+        if self is BlobStorageExportFieldGroup.METADATA:
+            return metadata()
+        if self is BlobStorageExportFieldGroup.MODEL:
+            return model()
+        if self is BlobStorageExportFieldGroup.USAGE:
+            return usage()
+        if self is BlobStorageExportFieldGroup.PROMPT:
+            return prompt()
+        if self is BlobStorageExportFieldGroup.METRICS:
+            return metrics()
+        if self is BlobStorageExportFieldGroup.TOOLS:
+            return tools()
+        if self is BlobStorageExportFieldGroup.TRACE_CONTEXT:
+            return trace_context()
diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py b/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py
index bcc7fc6d5..4799ecefb 100644
--- a/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py
+++ b/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py
@@ -8,16 +8,20 @@
 
 
 class BlobStorageExportFrequency(enum.StrEnum):
+    EVERY20MINUTES = "every_20_minutes"
     HOURLY = "hourly"
     DAILY = "daily"
     WEEKLY = "weekly"
 
     def visit(
         self,
+        every20minutes: typing.Callable[[], T_Result],
         hourly: typing.Callable[[], T_Result],
         daily: typing.Callable[[], T_Result],
         weekly: typing.Callable[[], T_Result],
     ) -> T_Result:
+        if self is BlobStorageExportFrequency.EVERY20MINUTES:
+            return every20minutes()
         if self is BlobStorageExportFrequency.HOURLY:
             return hourly()
         if self is BlobStorageExportFrequency.DAILY:
diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py b/langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py
new file mode 100644
index 000000000..1451473b4
--- /dev/null
+++ b/langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py
@@ -0,0 +1,35 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ...core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class BlobStorageExportSource(enum.StrEnum):
+    """
+    What data the integration exports.
+    - `LEGACY_TRACES_OBSERVATIONS`: traces, observations, and scores tables with a fixed column set. The `exportFieldGroups` field is not applicable.
+    - `OBSERVATIONS_V2`: same data model as the `/api/public/v2/observations` endpoint, plus scores. Columns are controlled by `exportFieldGroups`.
+    - `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: both sets. For the `OBSERVATIONS_V2` portion, columns are controlled by `exportFieldGroups`.
+
+    **Note:** `OBSERVATIONS_V2` and the enriched-observations portion of `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` rely on the enriched observations table (Langfuse Fast Preview / v4), which is currently available on Langfuse Cloud only. See https://langfuse.com/docs/v4.
+    """
+
+    LEGACY_TRACES_OBSERVATIONS = "LEGACY_TRACES_OBSERVATIONS"
+    OBSERVATIONS_V2 = "OBSERVATIONS_V2"
+    LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS = "LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS"
+
+    def visit(
+        self,
+        legacy_traces_observations: typing.Callable[[], T_Result],
+        observations_v2: typing.Callable[[], T_Result],
+        legacy_traces_and_enriched_observations: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is BlobStorageExportSource.LEGACY_TRACES_OBSERVATIONS:
+            return legacy_traces_observations()
+        if self is BlobStorageExportSource.OBSERVATIONS_V2:
+            return observations_v2()
+        if self is BlobStorageExportSource.LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS:
+            return legacy_traces_and_enriched_observations()
diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py b/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py
index b3630297b..e2b5921a0 100644
--- a/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py
+++ b/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py
@@ -7,8 +7,10 @@
 import typing_extensions
 from ...core.pydantic_utilities import UniversalBaseModel
 from ...core.serialization import FieldMetadata
+from .blob_storage_export_field_group import BlobStorageExportFieldGroup
 from .blob_storage_export_frequency import BlobStorageExportFrequency
 from .blob_storage_export_mode import BlobStorageExportMode
+from .blob_storage_export_source import BlobStorageExportSource
 from .blob_storage_integration_file_type import BlobStorageIntegrationFileType
 from .blob_storage_integration_type import BlobStorageIntegrationType
 
@@ -41,6 +43,17 @@ class BlobStorageIntegrationResponse(UniversalBaseModel):
         typing.Optional[dt.datetime], FieldMetadata(alias="exportStartDate")
     ] = None
     compressed: bool
+    export_source: typing_extensions.Annotated[
+        BlobStorageExportSource, FieldMetadata(alias="exportSource")
+    ]
+    export_field_groups: typing_extensions.Annotated[
+        typing.Optional[typing.List[BlobStorageExportFieldGroup]],
+        FieldMetadata(alias="exportFieldGroups"),
+    ] = pydantic.Field(default=None)
+    """
+    Field groups included in each exported row for `OBSERVATIONS_V2` / `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` sources. Always `null` when exportSource is `LEGACY_TRACES_OBSERVATIONS` (the field does not apply to that source; any legacy DB value is hidden from the public surface).
+    """
+
     next_sync_at: typing_extensions.Annotated[
         typing.Optional[dt.datetime], FieldMetadata(alias="nextSyncAt")
     ] = None
diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py b/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py
index 254e06645..559e41450 100644
--- a/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py
+++ b/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py
@@ -17,8 +17,8 @@ class BlobStorageSyncStatus(enum.StrEnum):
     - `up_to_date` — all available data has been exported; next export is scheduled for the future
 
     **ETL usage**: poll this endpoint and check for `up_to_date` status. Compare `lastSyncAt` against your
-    ETL bookmark to determine if new data is available. Note that exports run with a 30-minute lag buffer,
-    so `lastSyncAt` will always be at least 30 minutes behind real-time.
+    ETL bookmark to determine if new data is available. Note that exports run with a 20-minute lag buffer,
+    so `lastSyncAt` will always be at least 20 minutes behind real-time.
     """
 
     IDLE = "idle"
diff --git a/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py b/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py
index ada6e432b..89c9bca4a 100644
--- a/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py
+++ b/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py
@@ -7,8 +7,10 @@
 import typing_extensions
 from ...core.pydantic_utilities import UniversalBaseModel
 from ...core.serialization import FieldMetadata
+from .blob_storage_export_field_group import BlobStorageExportFieldGroup
 from .blob_storage_export_frequency import BlobStorageExportFrequency
 from .blob_storage_export_mode import BlobStorageExportMode
+from .blob_storage_export_source import BlobStorageExportSource
 from .blob_storage_integration_file_type import BlobStorageIntegrationFileType
 from .blob_storage_integration_type import BlobStorageIntegrationType
 
@@ -91,6 +93,29 @@ class CreateBlobStorageIntegrationRequest(UniversalBaseModel):
     Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true.
     """
 
+    export_source: typing_extensions.Annotated[
+        typing.Optional[BlobStorageExportSource], FieldMetadata(alias="exportSource")
+    ] = pydantic.Field(default=None)
+    """
+    Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided.
+    
+    **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected.
+    """
+
+    export_field_groups: typing_extensions.Annotated[
+        typing.Optional[typing.List[BlobStorageExportFieldGroup]],
+        FieldMetadata(alias="exportFieldGroups"),
+    ] = pydantic.Field(default=None)
+    """
+    Field groups to include in each exported row.
+    
+    For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved.
+    
+    For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups.
+    
+    `exportFieldGroups` requires `exportSource` to be provided in the same request.
+    """
+
     model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
         extra="allow", frozen=True
     )
diff --git a/langfuse/api/client.py b/langfuse/api/client.py
index 3f656cdcd..c0413704b 100644
--- a/langfuse/api/client.py
+++ b/langfuse/api/client.py
@@ -41,6 +41,7 @@
     from .scores.client import AsyncScoresClient, ScoresClient
     from .sessions.client import AsyncSessionsClient, SessionsClient
     from .trace.client import AsyncTraceClient, TraceClient
+    from .unstable.client import AsyncUnstableClient, UnstableClient
 
 
 class LangfuseAPI:
@@ -147,6 +148,7 @@ def __init__(
         self._scores: typing.Optional[ScoresClient] = None
         self._sessions: typing.Optional[SessionsClient] = None
         self._trace: typing.Optional[TraceClient] = None
+        self._unstable: typing.Optional[UnstableClient] = None
 
     @property
     def annotation_queues(self):
@@ -358,6 +360,14 @@ def trace(self):
             self._trace = TraceClient(client_wrapper=self._client_wrapper)
         return self._trace
 
+    @property
+    def unstable(self):
+        if self._unstable is None:
+            from .unstable.client import UnstableClient  # noqa: E402
+
+            self._unstable = UnstableClient(client_wrapper=self._client_wrapper)
+        return self._unstable
+
 
 class AsyncLangfuseAPI:
     """
@@ -463,6 +473,7 @@ def __init__(
         self._scores: typing.Optional[AsyncScoresClient] = None
         self._sessions: typing.Optional[AsyncSessionsClient] = None
         self._trace: typing.Optional[AsyncTraceClient] = None
+        self._unstable: typing.Optional[AsyncUnstableClient] = None
 
     @property
     def annotation_queues(self):
@@ -677,3 +688,11 @@ def trace(self):
 
             self._trace = AsyncTraceClient(client_wrapper=self._client_wrapper)
         return self._trace
+
+    @property
+    def unstable(self):
+        if self._unstable is None:
+            from .unstable.client import AsyncUnstableClient  # noqa: E402
+
+            self._unstable = AsyncUnstableClient(client_wrapper=self._client_wrapper)
+        return self._unstable
diff --git a/langfuse/api/commons/types/observation_v2.py b/langfuse/api/commons/types/observation_v2.py
index 149dfb422..08c1604cf 100644
--- a/langfuse/api/commons/types/observation_v2.py
+++ b/langfuse/api/commons/types/observation_v2.py
@@ -190,6 +190,13 @@ class ObservationV2(UniversalBaseModel):
     The total cost of the observation in USD
     """
 
+    usage_pricing_tier_name: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="usagePricingTierName")
+    ] = pydantic.Field(default=None)
+    """
+    The name of the pricing tier applied to this observation's usage costs
+    """
+
     prompt_id: typing_extensions.Annotated[
         typing.Optional[str], FieldMetadata(alias="promptId")
     ] = pydantic.Field(default=None)
@@ -227,7 +234,45 @@ class ObservationV2(UniversalBaseModel):
         typing.Optional[str], FieldMetadata(alias="modelId")
     ] = pydantic.Field(default=None)
     """
-    The matched model ID
+    The matched model ID. Null when the `model` field group is not requested.
+    """
+
+    input_price: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="inputPrice")
+    ] = pydantic.Field(default=None)
+    """
+    The input token price (USD per unit) from the matched model, serialized as a decimal string (e.g. "0.0001"). Null when the `model` field group is not requested.
+    """
+
+    output_price: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="outputPrice")
+    ] = pydantic.Field(default=None)
+    """
+    The output token price (USD per unit) from the matched model, serialized as a decimal string (e.g. "0.0001"). Null when the `model` field group is not requested.
+    """
+
+    total_price: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="totalPrice")
+    ] = pydantic.Field(default=None)
+    """
+    The total token price (USD per unit) from the matched model, serialized as a decimal string (e.g. "0.0001"). Null when the `model` field group is not requested.
+    """
+
+    trace_name: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="traceName")
+    ] = pydantic.Field(default=None)
+    """
+    The name of the parent trace
+    """
+
+    tags: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
+    """
+    Tags from the parent trace (denormalized onto the observation)
+    """
+
+    release: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    The release version of the parent trace
     """
 
     model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
diff --git a/langfuse/api/legacy/__init__.py b/langfuse/api/legacy/__init__.py
index d91b42c2b..0a67d1c0c 100644
--- a/langfuse/api/legacy/__init__.py
+++ b/langfuse/api/legacy/__init__.py
@@ -9,10 +9,11 @@
     from . import metrics_v1, observations_v1, score_v1
     from .metrics_v1 import MetricsResponse
     from .observations_v1 import Observations, ObservationsViews
-    from .score_v1 import CreateScoreRequest, CreateScoreResponse
+    from .score_v1 import CreateScoreRequest, CreateScoreResponse, CreateScoreSource
 _dynamic_imports: typing.Dict[str, str] = {
     "CreateScoreRequest": ".score_v1",
     "CreateScoreResponse": ".score_v1",
+    "CreateScoreSource": ".score_v1",
     "MetricsResponse": ".metrics_v1",
     "Observations": ".observations_v1",
     "ObservationsViews": ".observations_v1",
@@ -52,6 +53,7 @@ def __dir__():
 __all__ = [
     "CreateScoreRequest",
     "CreateScoreResponse",
+    "CreateScoreSource",
     "MetricsResponse",
     "Observations",
     "ObservationsViews",
diff --git a/langfuse/api/legacy/score_v1/__init__.py b/langfuse/api/legacy/score_v1/__init__.py
index 3d0c7422a..4841a9656 100644
--- a/langfuse/api/legacy/score_v1/__init__.py
+++ b/langfuse/api/legacy/score_v1/__init__.py
@@ -6,10 +6,11 @@
 from importlib import import_module
 
 if typing.TYPE_CHECKING:
-    from .types import CreateScoreRequest, CreateScoreResponse
+    from .types import CreateScoreRequest, CreateScoreResponse, CreateScoreSource
 _dynamic_imports: typing.Dict[str, str] = {
     "CreateScoreRequest": ".types",
     "CreateScoreResponse": ".types",
+    "CreateScoreSource": ".types",
 }
 
 
@@ -40,4 +41,4 @@ def __dir__():
     return sorted(lazy_attrs)
 
 
-__all__ = ["CreateScoreRequest", "CreateScoreResponse"]
+__all__ = ["CreateScoreRequest", "CreateScoreResponse", "CreateScoreSource"]
diff --git a/langfuse/api/legacy/score_v1/client.py b/langfuse/api/legacy/score_v1/client.py
index 03ca8b836..60f118747 100644
--- a/langfuse/api/legacy/score_v1/client.py
+++ b/langfuse/api/legacy/score_v1/client.py
@@ -8,6 +8,7 @@
 from ...core.request_options import RequestOptions
 from .raw_client import AsyncRawScoreV1Client, RawScoreV1Client
 from .types.create_score_response import CreateScoreResponse
+from .types.create_score_source import CreateScoreSource
 
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -44,6 +45,7 @@ def create(
         queue_id: typing.Optional[str] = OMIT,
         data_type: typing.Optional[ScoreDataType] = OMIT,
         config_id: typing.Optional[str] = OMIT,
+        source: typing.Optional[CreateScoreSource] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> CreateScoreResponse:
         """
@@ -82,6 +84,9 @@ def create(
         config_id : typing.Optional[str]
             Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated.
 
+        source : typing.Optional[CreateScoreSource]
+            The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -120,6 +125,7 @@ def create(
             queue_id=queue_id,
             data_type=data_type,
             config_id=config_id,
+            source=source,
             request_options=request_options,
         )
         return _response.data
@@ -193,6 +199,7 @@ async def create(
         queue_id: typing.Optional[str] = OMIT,
         data_type: typing.Optional[ScoreDataType] = OMIT,
         config_id: typing.Optional[str] = OMIT,
+        source: typing.Optional[CreateScoreSource] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> CreateScoreResponse:
         """
@@ -231,6 +238,9 @@ async def create(
         config_id : typing.Optional[str]
             Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated.
 
+        source : typing.Optional[CreateScoreSource]
+            The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -277,6 +287,7 @@ async def main() -> None:
             queue_id=queue_id,
             data_type=data_type,
             config_id=config_id,
+            source=source,
             request_options=request_options,
         )
         return _response.data
diff --git a/langfuse/api/legacy/score_v1/raw_client.py b/langfuse/api/legacy/score_v1/raw_client.py
index 834560ec9..3dc0164e0 100644
--- a/langfuse/api/legacy/score_v1/raw_client.py
+++ b/langfuse/api/legacy/score_v1/raw_client.py
@@ -18,6 +18,7 @@
 from ...core.request_options import RequestOptions
 from ...core.serialization import convert_and_respect_annotation_metadata
 from .types.create_score_response import CreateScoreResponse
+from .types.create_score_source import CreateScoreSource
 
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -43,6 +44,7 @@ def create(
         queue_id: typing.Optional[str] = OMIT,
         data_type: typing.Optional[ScoreDataType] = OMIT,
         config_id: typing.Optional[str] = OMIT,
+        source: typing.Optional[CreateScoreSource] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[CreateScoreResponse]:
         """
@@ -81,6 +83,9 @@ def create(
         config_id : typing.Optional[str]
             Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated.
 
+        source : typing.Optional[CreateScoreSource]
+            The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -107,6 +112,7 @@ def create(
                 "queueId": queue_id,
                 "dataType": data_type,
                 "configId": config_id,
+                "source": source,
             },
             request_options=request_options,
             omit=OMIT,
@@ -304,6 +310,7 @@ async def create(
         queue_id: typing.Optional[str] = OMIT,
         data_type: typing.Optional[ScoreDataType] = OMIT,
         config_id: typing.Optional[str] = OMIT,
+        source: typing.Optional[CreateScoreSource] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[CreateScoreResponse]:
         """
@@ -342,6 +349,9 @@ async def create(
         config_id : typing.Optional[str]
             Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated.
 
+        source : typing.Optional[CreateScoreSource]
+            The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -368,6 +378,7 @@ async def create(
                 "queueId": queue_id,
                 "dataType": data_type,
                 "configId": config_id,
+                "source": source,
             },
             request_options=request_options,
             omit=OMIT,
diff --git a/langfuse/api/legacy/score_v1/types/__init__.py b/langfuse/api/legacy/score_v1/types/__init__.py
index 4a759a978..dde25cbb4 100644
--- a/langfuse/api/legacy/score_v1/types/__init__.py
+++ b/langfuse/api/legacy/score_v1/types/__init__.py
@@ -8,9 +8,11 @@
 if typing.TYPE_CHECKING:
     from .create_score_request import CreateScoreRequest
     from .create_score_response import CreateScoreResponse
+    from .create_score_source import CreateScoreSource
 _dynamic_imports: typing.Dict[str, str] = {
     "CreateScoreRequest": ".create_score_request",
     "CreateScoreResponse": ".create_score_response",
+    "CreateScoreSource": ".create_score_source",
 }
 
 
@@ -41,4 +43,4 @@ def __dir__():
     return sorted(lazy_attrs)
 
 
-__all__ = ["CreateScoreRequest", "CreateScoreResponse"]
+__all__ = ["CreateScoreRequest", "CreateScoreResponse", "CreateScoreSource"]
diff --git a/langfuse/api/legacy/score_v1/types/create_score_request.py b/langfuse/api/legacy/score_v1/types/create_score_request.py
index a0397bdfc..ef498fe6c 100644
--- a/langfuse/api/legacy/score_v1/types/create_score_request.py
+++ b/langfuse/api/legacy/score_v1/types/create_score_request.py
@@ -8,6 +8,7 @@
 from ....commons.types.score_data_type import ScoreDataType
 from ....core.pydantic_utilities import UniversalBaseModel
 from ....core.serialization import FieldMetadata
+from .create_score_source import CreateScoreSource
 
 
 class CreateScoreRequest(UniversalBaseModel):
@@ -70,6 +71,11 @@ class CreateScoreRequest(UniversalBaseModel):
     Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated.
     """
 
+    source: typing.Optional[CreateScoreSource] = pydantic.Field(default=None)
+    """
+    The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint.
+    """
+
     model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
         extra="allow", frozen=True
     )
diff --git a/langfuse/api/legacy/score_v1/types/create_score_source.py b/langfuse/api/legacy/score_v1/types/create_score_source.py
new file mode 100644
index 000000000..7364efd61
--- /dev/null
+++ b/langfuse/api/legacy/score_v1/types/create_score_source.py
@@ -0,0 +1,28 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class CreateScoreSource(enum.StrEnum):
+    """
+    Source values accepted when creating a score via the public REST API.
+    EVAL is reserved for internal evaluator outputs and is intentionally not
+    exposed here — use commons.ScoreSource when reading scores.
+    """
+
+    API = "API"
+    ANNOTATION = "ANNOTATION"
+
+    def visit(
+        self,
+        api: typing.Callable[[], T_Result],
+        annotation: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is CreateScoreSource.API:
+            return api()
+        if self is CreateScoreSource.ANNOTATION:
+            return annotation()
diff --git a/langfuse/api/llm_connections/__init__.py b/langfuse/api/llm_connections/__init__.py
index aba7157f1..e4edb011c 100644
--- a/langfuse/api/llm_connections/__init__.py
+++ b/langfuse/api/llm_connections/__init__.py
@@ -7,12 +7,14 @@
 
 if typing.TYPE_CHECKING:
     from .types import (
+        DeleteLlmConnectionResponse,
         LlmAdapter,
         LlmConnection,
         PaginatedLlmConnections,
         UpsertLlmConnectionRequest,
     )
 _dynamic_imports: typing.Dict[str, str] = {
+    "DeleteLlmConnectionResponse": ".types",
     "LlmAdapter": ".types",
     "LlmConnection": ".types",
     "PaginatedLlmConnections": ".types",
@@ -48,6 +50,7 @@ def __dir__():
 
 
 __all__ = [
+    "DeleteLlmConnectionResponse",
     "LlmAdapter",
     "LlmConnection",
     "PaginatedLlmConnections",
diff --git a/langfuse/api/llm_connections/client.py b/langfuse/api/llm_connections/client.py
index 213e55e9f..62c4293ff 100644
--- a/langfuse/api/llm_connections/client.py
+++ b/langfuse/api/llm_connections/client.py
@@ -5,6 +5,7 @@
 from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.request_options import RequestOptions
 from .raw_client import AsyncRawLlmConnectionsClient, RawLlmConnectionsClient
+from .types.delete_llm_connection_response import DeleteLlmConnectionResponse
 from .types.llm_adapter import LlmAdapter
 from .types.llm_connection import LlmConnection
 from .types.paginated_llm_connections import PaginatedLlmConnections
@@ -153,6 +154,42 @@ def upsert(
         )
         return _response.data
 
+    def delete(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> DeleteLlmConnectionResponse:
+        """
+        Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused.
+
+        Parameters
+        ----------
+        id : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DeleteLlmConnectionResponse
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.llm_connections.delete(
+            id="id",
+        )
+        """
+        _response = self._raw_client.delete(id, request_options=request_options)
+        return _response.data
+
 
 class AsyncLlmConnectionsClient:
     def __init__(self, *, client_wrapper: AsyncClientWrapper):
@@ -309,3 +346,47 @@ async def main() -> None:
             request_options=request_options,
         )
         return _response.data
+
+    async def delete(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> DeleteLlmConnectionResponse:
+        """
+        Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused.
+
+        Parameters
+        ----------
+        id : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DeleteLlmConnectionResponse
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.llm_connections.delete(
+                id="id",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.delete(id, request_options=request_options)
+        return _response.data
diff --git a/langfuse/api/llm_connections/raw_client.py b/langfuse/api/llm_connections/raw_client.py
index ef4f87425..30f7beebb 100644
--- a/langfuse/api/llm_connections/raw_client.py
+++ b/langfuse/api/llm_connections/raw_client.py
@@ -11,8 +11,10 @@
 from ..core.api_error import ApiError
 from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.http_response import AsyncHttpResponse, HttpResponse
+from ..core.jsonable_encoder import jsonable_encoder
 from ..core.pydantic_utilities import parse_obj_as
 from ..core.request_options import RequestOptions
+from .types.delete_llm_connection_response import DeleteLlmConnectionResponse
 from .types.llm_adapter import LlmAdapter
 from .types.llm_connection import LlmConnection
 from .types.paginated_llm_connections import PaginatedLlmConnections
@@ -280,6 +282,106 @@ def upsert(
             body=_response_json,
         )
 
+    def delete(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> HttpResponse[DeleteLlmConnectionResponse]:
+        """
+        Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused.
+
+        Parameters
+        ----------
+        id : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[DeleteLlmConnectionResponse]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"api/public/llm-connections/{jsonable_encoder(id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    DeleteLlmConnectionResponse,
+                    parse_obj_as(
+                        type_=DeleteLlmConnectionResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
 
 class AsyncRawLlmConnectionsClient:
     def __init__(self, *, client_wrapper: AsyncClientWrapper):
@@ -539,3 +641,103 @@ async def upsert(
             headers=dict(_response.headers),
             body=_response_json,
         )
+
+    async def delete(
+        self, id: str, *, request_options: typing.Optional[RequestOptions] = None
+    ) -> AsyncHttpResponse[DeleteLlmConnectionResponse]:
+        """
+        Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused.
+
+        Parameters
+        ----------
+        id : str
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[DeleteLlmConnectionResponse]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"api/public/llm-connections/{jsonable_encoder(id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    DeleteLlmConnectionResponse,
+                    parse_obj_as(
+                        type_=DeleteLlmConnectionResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
diff --git a/langfuse/api/llm_connections/types/__init__.py b/langfuse/api/llm_connections/types/__init__.py
index e6ba89200..ab24fc400 100644
--- a/langfuse/api/llm_connections/types/__init__.py
+++ b/langfuse/api/llm_connections/types/__init__.py
@@ -6,11 +6,13 @@
 from importlib import import_module
 
 if typing.TYPE_CHECKING:
+    from .delete_llm_connection_response import DeleteLlmConnectionResponse
     from .llm_adapter import LlmAdapter
     from .llm_connection import LlmConnection
     from .paginated_llm_connections import PaginatedLlmConnections
     from .upsert_llm_connection_request import UpsertLlmConnectionRequest
 _dynamic_imports: typing.Dict[str, str] = {
+    "DeleteLlmConnectionResponse": ".delete_llm_connection_response",
     "LlmAdapter": ".llm_adapter",
     "LlmConnection": ".llm_connection",
     "PaginatedLlmConnections": ".paginated_llm_connections",
@@ -46,6 +48,7 @@ def __dir__():
 
 
 __all__ = [
+    "DeleteLlmConnectionResponse",
     "LlmAdapter",
     "LlmConnection",
     "PaginatedLlmConnections",
diff --git a/langfuse/api/llm_connections/types/delete_llm_connection_response.py b/langfuse/api/llm_connections/types/delete_llm_connection_response.py
new file mode 100644
index 000000000..080a1904c
--- /dev/null
+++ b/langfuse/api/llm_connections/types/delete_llm_connection_response.py
@@ -0,0 +1,14 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import UniversalBaseModel
+
+
+class DeleteLlmConnectionResponse(UniversalBaseModel):
+    message: str
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/observations/client.py b/langfuse/api/observations/client.py
index ce0de0cf2..ff7069ede 100644
--- a/langfuse/api/observations/client.py
+++ b/langfuse/api/observations/client.py
@@ -62,9 +62,10 @@ def get_many(
         - `io` - input, output
         - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values)
         - `model` - providedModelName, internalModelId, modelParameters
-        - `usage` - usageDetails, costDetails, totalCost
+        - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName
         - `prompt` - promptId, promptName, promptVersion
         - `metrics` - latency, timeToFirstToken
+        - `trace_context` - tags, release, traceName
 
         If not specified, `core` and `basic` field groups are returned.
 
@@ -76,7 +77,7 @@ def get_many(
         ----------
         fields : typing.Optional[str]
             Comma-separated list of field groups to include in the response.
-            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics.
+            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context.
             If not specified, `core` and `basic` field groups are returned.
             Example: "basic,usage,model"
 
@@ -135,12 +136,12 @@ def get_many(
                 "column": string,         // Required. Column to filter on (see available columns below)
                 "operator": string,       // Required. Operator based on type:
                                           // - datetime: ">", "<", ">=", "<="
-                                          // - string: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - stringOptions: "any of", "none of"
                                           // - categoryOptions: "any of", "none of"
                                           // - arrayOptions: "any of", "none of", "all of"
                                           // - number: "=", ">", "<", ">=", "<="
-                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - numberObject: "=", ">", "<", ">=", "<="
                                           // - boolean: "=", "<>"
                                           // - null: "is null", "is not null"
@@ -192,8 +193,12 @@ def get_many(
             - `promptVersion` (number) - Associated prompt version
 
             ### Structured Data
+            - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator.
+            - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator.
             - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys.
 
+            The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected.
+
             ## Filter Examples
             ```json
             [
@@ -215,6 +220,12 @@ def get_many(
                 "key": "environment",
                 "operator": "=",
                 "value": "production"
+              },
+              {
+                "type": "string",
+                "column": "output",
+                "operator": "matches",
+                "value": "needle"
               }
             ]
             ```
@@ -314,9 +325,10 @@ async def get_many(
         - `io` - input, output
         - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values)
         - `model` - providedModelName, internalModelId, modelParameters
-        - `usage` - usageDetails, costDetails, totalCost
+        - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName
         - `prompt` - promptId, promptName, promptVersion
         - `metrics` - latency, timeToFirstToken
+        - `trace_context` - tags, release, traceName
 
         If not specified, `core` and `basic` field groups are returned.
 
@@ -328,7 +340,7 @@ async def get_many(
         ----------
         fields : typing.Optional[str]
             Comma-separated list of field groups to include in the response.
-            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics.
+            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context.
             If not specified, `core` and `basic` field groups are returned.
             Example: "basic,usage,model"
 
@@ -387,12 +399,12 @@ async def get_many(
                 "column": string,         // Required. Column to filter on (see available columns below)
                 "operator": string,       // Required. Operator based on type:
                                           // - datetime: ">", "<", ">=", "<="
-                                          // - string: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - stringOptions: "any of", "none of"
                                           // - categoryOptions: "any of", "none of"
                                           // - arrayOptions: "any of", "none of", "all of"
                                           // - number: "=", ">", "<", ">=", "<="
-                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - numberObject: "=", ">", "<", ">=", "<="
                                           // - boolean: "=", "<>"
                                           // - null: "is null", "is not null"
@@ -444,8 +456,12 @@ async def get_many(
             - `promptVersion` (number) - Associated prompt version
 
             ### Structured Data
+            - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator.
+            - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator.
             - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys.
 
+            The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected.
+
             ## Filter Examples
             ```json
             [
@@ -467,6 +483,12 @@ async def get_many(
                 "key": "environment",
                 "operator": "=",
                 "value": "production"
+              },
+              {
+                "type": "string",
+                "column": "output",
+                "operator": "matches",
+                "value": "needle"
               }
             ]
             ```
diff --git a/langfuse/api/observations/raw_client.py b/langfuse/api/observations/raw_client.py
index 3ae8eab15..f6502014e 100644
--- a/langfuse/api/observations/raw_client.py
+++ b/langfuse/api/observations/raw_client.py
@@ -60,9 +60,10 @@ def get_many(
         - `io` - input, output
         - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values)
         - `model` - providedModelName, internalModelId, modelParameters
-        - `usage` - usageDetails, costDetails, totalCost
+        - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName
         - `prompt` - promptId, promptName, promptVersion
         - `metrics` - latency, timeToFirstToken
+        - `trace_context` - tags, release, traceName
 
         If not specified, `core` and `basic` field groups are returned.
 
@@ -74,7 +75,7 @@ def get_many(
         ----------
         fields : typing.Optional[str]
             Comma-separated list of field groups to include in the response.
-            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics.
+            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context.
             If not specified, `core` and `basic` field groups are returned.
             Example: "basic,usage,model"
 
@@ -133,12 +134,12 @@ def get_many(
                 "column": string,         // Required. Column to filter on (see available columns below)
                 "operator": string,       // Required. Operator based on type:
                                           // - datetime: ">", "<", ">=", "<="
-                                          // - string: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - stringOptions: "any of", "none of"
                                           // - categoryOptions: "any of", "none of"
                                           // - arrayOptions: "any of", "none of", "all of"
                                           // - number: "=", ">", "<", ">=", "<="
-                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - numberObject: "=", ">", "<", ">=", "<="
                                           // - boolean: "=", "<>"
                                           // - null: "is null", "is not null"
@@ -190,8 +191,12 @@ def get_many(
             - `promptVersion` (number) - Associated prompt version
 
             ### Structured Data
+            - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator.
+            - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator.
             - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys.
 
+            The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected.
+
             ## Filter Examples
             ```json
             [
@@ -213,6 +218,12 @@ def get_many(
                 "key": "environment",
                 "operator": "=",
                 "value": "production"
+              },
+              {
+                "type": "string",
+                "column": "output",
+                "operator": "matches",
+                "value": "needle"
               }
             ]
             ```
@@ -371,9 +382,10 @@ async def get_many(
         - `io` - input, output
         - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values)
         - `model` - providedModelName, internalModelId, modelParameters
-        - `usage` - usageDetails, costDetails, totalCost
+        - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName
         - `prompt` - promptId, promptName, promptVersion
         - `metrics` - latency, timeToFirstToken
+        - `trace_context` - tags, release, traceName
 
         If not specified, `core` and `basic` field groups are returned.
 
@@ -385,7 +397,7 @@ async def get_many(
         ----------
         fields : typing.Optional[str]
             Comma-separated list of field groups to include in the response.
-            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics.
+            Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context.
             If not specified, `core` and `basic` field groups are returned.
             Example: "basic,usage,model"
 
@@ -444,12 +456,12 @@ async def get_many(
                 "column": string,         // Required. Column to filter on (see available columns below)
                 "operator": string,       // Required. Operator based on type:
                                           // - datetime: ">", "<", ">=", "<="
-                                          // - string: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - stringOptions: "any of", "none of"
                                           // - categoryOptions: "any of", "none of"
                                           // - arrayOptions: "any of", "none of", "all of"
                                           // - number: "=", ">", "<", ">=", "<="
-                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with"
+                                          // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches"
                                           // - numberObject: "=", ">", "<", ">=", "<="
                                           // - boolean: "=", "<>"
                                           // - null: "is null", "is not null"
@@ -501,8 +513,12 @@ async def get_many(
             - `promptVersion` (number) - Associated prompt version
 
             ### Structured Data
+            - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator.
+            - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator.
             - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys.
 
+            The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected.
+
             ## Filter Examples
             ```json
             [
@@ -524,6 +540,12 @@ async def get_many(
                 "key": "environment",
                 "operator": "=",
                 "value": "production"
+              },
+              {
+                "type": "string",
+                "column": "output",
+                "operator": "matches",
+                "value": "needle"
               }
             ]
             ```
diff --git a/langfuse/api/score_configs/client.py b/langfuse/api/score_configs/client.py
index da6626043..b900e3f5d 100644
--- a/langfuse/api/score_configs/client.py
+++ b/langfuse/api/score_configs/client.py
@@ -46,6 +46,7 @@ def create(
         Parameters
         ----------
         name : str
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         data_type : ScoreConfigDataType
 
@@ -204,7 +205,7 @@ def update(
             The status of the score config showing if it is archived or not
 
         name : typing.Optional[str]
-            The name of the score config
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         categories : typing.Optional[typing.Sequence[ConfigCategory]]
             Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed
@@ -286,6 +287,7 @@ async def create(
         Parameters
         ----------
         name : str
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         data_type : ScoreConfigDataType
 
@@ -468,7 +470,7 @@ async def update(
             The status of the score config showing if it is archived or not
 
         name : typing.Optional[str]
-            The name of the score config
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         categories : typing.Optional[typing.Sequence[ConfigCategory]]
             Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed
diff --git a/langfuse/api/score_configs/raw_client.py b/langfuse/api/score_configs/raw_client.py
index 8021940c6..11de026c6 100644
--- a/langfuse/api/score_configs/raw_client.py
+++ b/langfuse/api/score_configs/raw_client.py
@@ -45,6 +45,7 @@ def create(
         Parameters
         ----------
         name : str
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         data_type : ScoreConfigDataType
 
@@ -400,7 +401,7 @@ def update(
             The status of the score config showing if it is archived or not
 
         name : typing.Optional[str]
-            The name of the score config
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         categories : typing.Optional[typing.Sequence[ConfigCategory]]
             Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed
@@ -539,6 +540,7 @@ async def create(
         Parameters
         ----------
         name : str
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         data_type : ScoreConfigDataType
 
@@ -894,7 +896,7 @@ async def update(
             The status of the score config showing if it is archived or not
 
         name : typing.Optional[str]
-            The name of the score config
+            Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
 
         categories : typing.Optional[typing.Sequence[ConfigCategory]]
             Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed
diff --git a/langfuse/api/score_configs/types/create_score_config_request.py b/langfuse/api/score_configs/types/create_score_config_request.py
index 1c23fd91e..0edb01407 100644
--- a/langfuse/api/score_configs/types/create_score_config_request.py
+++ b/langfuse/api/score_configs/types/create_score_config_request.py
@@ -11,7 +11,11 @@
 
 
 class CreateScoreConfigRequest(UniversalBaseModel):
-    name: str
+    name: str = pydantic.Field()
+    """
+    Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
+    """
+
     data_type: typing_extensions.Annotated[
         ScoreConfigDataType, FieldMetadata(alias="dataType")
     ]
diff --git a/langfuse/api/score_configs/types/update_score_config_request.py b/langfuse/api/score_configs/types/update_score_config_request.py
index 5237c544f..28c4248e9 100644
--- a/langfuse/api/score_configs/types/update_score_config_request.py
+++ b/langfuse/api/score_configs/types/update_score_config_request.py
@@ -19,7 +19,7 @@ class UpdateScoreConfigRequest(UniversalBaseModel):
 
     name: typing.Optional[str] = pydantic.Field(default=None)
     """
-    The name of the score config
+    Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed.
     """
 
     categories: typing.Optional[typing.List[ConfigCategory]] = pydantic.Field(
diff --git a/langfuse/api/scores/client.py b/langfuse/api/scores/client.py
index 91db2c416..566530e21 100644
--- a/langfuse/api/scores/client.py
+++ b/langfuse/api/scores/client.py
@@ -62,7 +62,7 @@ def get_many(
             Page number, starts at 1.
 
         limit : typing.Optional[int]
-            Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit.
+            Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit.
 
         user_id : typing.Optional[str]
             Retrieve only scores with this userId associated to the trace.
@@ -258,7 +258,7 @@ async def get_many(
             Page number, starts at 1.
 
         limit : typing.Optional[int]
-            Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit.
+            Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit.
 
         user_id : typing.Optional[str]
             Retrieve only scores with this userId associated to the trace.
diff --git a/langfuse/api/scores/raw_client.py b/langfuse/api/scores/raw_client.py
index 2dc16e688..d1508545c 100644
--- a/langfuse/api/scores/raw_client.py
+++ b/langfuse/api/scores/raw_client.py
@@ -61,7 +61,7 @@ def get_many(
             Page number, starts at 1.
 
         limit : typing.Optional[int]
-            Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit.
+            Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit.
 
         user_id : typing.Optional[str]
             Retrieve only scores with this userId associated to the trace.
@@ -378,7 +378,7 @@ async def get_many(
             Page number, starts at 1.
 
         limit : typing.Optional[int]
-            Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit.
+            Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit.
 
         user_id : typing.Optional[str]
             Retrieve only scores with this userId associated to the trace.
diff --git a/langfuse/api/unstable/__init__.py b/langfuse/api/unstable/__init__.py
new file mode 100644
index 000000000..75aafdc24
--- /dev/null
+++ b/langfuse/api/unstable/__init__.py
@@ -0,0 +1,267 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .errors import (
+        AccessDeniedError,
+        BadRequestError,
+        ConflictError,
+        InternalServerError,
+        MethodNotAllowedError,
+        NotFoundError,
+        PublicApiError,
+        PublicApiErrorCode,
+        PublicApiErrorDetails,
+        PublicApiValidationIssue,
+        TooManyRequestsError,
+        UnauthorizedError,
+        UnprocessableContentError,
+    )
+    from . import commons, errors, evaluation_rules, evaluators
+    from .commons import (
+        ArrayOptionsEvaluationRuleFilter,
+        BooleanEvaluationRuleFilter,
+        CategoryOptionsEvaluationRuleFilter,
+        DateTimeEvaluationRuleFilter,
+        EvaluationRuleArrayOptionsFilterOperator,
+        EvaluationRuleBooleanFilterOperator,
+        EvaluationRuleFilter,
+        EvaluationRuleFilter_ArrayOptions,
+        EvaluationRuleFilter_Boolean,
+        EvaluationRuleFilter_CategoryOptions,
+        EvaluationRuleFilter_Datetime,
+        EvaluationRuleFilter_Null,
+        EvaluationRuleFilter_Number,
+        EvaluationRuleFilter_NumberObject,
+        EvaluationRuleFilter_String,
+        EvaluationRuleFilter_StringObject,
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleMapping,
+        EvaluationRuleMappingSource,
+        EvaluationRuleNullFilterOperator,
+        EvaluationRuleNumberFilterOperator,
+        EvaluationRuleOptionsFilterOperator,
+        EvaluationRuleStatus,
+        EvaluationRuleStringFilterOperator,
+        EvaluationRuleTarget,
+        EvaluatorModelConfig,
+        EvaluatorOutputDataType,
+        EvaluatorOutputDefinition,
+        EvaluatorOutputDefinition_Boolean,
+        EvaluatorOutputDefinition_Categorical,
+        EvaluatorOutputDefinition_Numeric,
+        EvaluatorOutputFieldDefinition,
+        EvaluatorScope,
+        EvaluatorType,
+        NullEvaluationRuleFilter,
+        NumberEvaluationRuleFilter,
+        NumberObjectEvaluationRuleFilter,
+        PublicBooleanEvaluatorOutputDefinition,
+        PublicCategoricalEvaluatorOutputDefinition,
+        PublicCategoricalEvaluatorOutputScoreDefinition,
+        PublicEvaluatorOutputDefinition,
+        PublicEvaluatorOutputDefinition_Boolean,
+        PublicEvaluatorOutputDefinition_Categorical,
+        PublicEvaluatorOutputDefinition_Numeric,
+        PublicNumericEvaluatorOutputDefinition,
+        StringEvaluationRuleFilter,
+        StringObjectEvaluationRuleFilter,
+        StringOptionsEvaluationRuleFilter,
+    )
+    from .evaluation_rules import (
+        CreateEvaluationRuleRequest,
+        DeleteEvaluationRuleResponse,
+        EvaluationRule,
+        EvaluationRuleEvaluator,
+        EvaluationRuleEvaluatorReference,
+        EvaluationRules,
+        UpdateEvaluationRuleRequest,
+    )
+    from .evaluators import CreateEvaluatorRequest, Evaluator, Evaluators
+_dynamic_imports: typing.Dict[str, str] = {
+    "AccessDeniedError": ".errors",
+    "ArrayOptionsEvaluationRuleFilter": ".commons",
+    "BadRequestError": ".errors",
+    "BooleanEvaluationRuleFilter": ".commons",
+    "CategoryOptionsEvaluationRuleFilter": ".commons",
+    "ConflictError": ".errors",
+    "CreateEvaluationRuleRequest": ".evaluation_rules",
+    "CreateEvaluatorRequest": ".evaluators",
+    "DateTimeEvaluationRuleFilter": ".commons",
+    "DeleteEvaluationRuleResponse": ".evaluation_rules",
+    "EvaluationRule": ".evaluation_rules",
+    "EvaluationRuleArrayOptionsFilterOperator": ".commons",
+    "EvaluationRuleBooleanFilterOperator": ".commons",
+    "EvaluationRuleEvaluator": ".evaluation_rules",
+    "EvaluationRuleEvaluatorReference": ".evaluation_rules",
+    "EvaluationRuleFilter": ".commons",
+    "EvaluationRuleFilter_ArrayOptions": ".commons",
+    "EvaluationRuleFilter_Boolean": ".commons",
+    "EvaluationRuleFilter_CategoryOptions": ".commons",
+    "EvaluationRuleFilter_Datetime": ".commons",
+    "EvaluationRuleFilter_Null": ".commons",
+    "EvaluationRuleFilter_Number": ".commons",
+    "EvaluationRuleFilter_NumberObject": ".commons",
+    "EvaluationRuleFilter_String": ".commons",
+    "EvaluationRuleFilter_StringObject": ".commons",
+    "EvaluationRuleFilter_StringOptions": ".commons",
+    "EvaluationRuleMapping": ".commons",
+    "EvaluationRuleMappingSource": ".commons",
+    "EvaluationRuleNullFilterOperator": ".commons",
+    "EvaluationRuleNumberFilterOperator": ".commons",
+    "EvaluationRuleOptionsFilterOperator": ".commons",
+    "EvaluationRuleStatus": ".commons",
+    "EvaluationRuleStringFilterOperator": ".commons",
+    "EvaluationRuleTarget": ".commons",
+    "EvaluationRules": ".evaluation_rules",
+    "Evaluator": ".evaluators",
+    "EvaluatorModelConfig": ".commons",
+    "EvaluatorOutputDataType": ".commons",
+    "EvaluatorOutputDefinition": ".commons",
+    "EvaluatorOutputDefinition_Boolean": ".commons",
+    "EvaluatorOutputDefinition_Categorical": ".commons",
+    "EvaluatorOutputDefinition_Numeric": ".commons",
+    "EvaluatorOutputFieldDefinition": ".commons",
+    "EvaluatorScope": ".commons",
+    "EvaluatorType": ".commons",
+    "Evaluators": ".evaluators",
+    "InternalServerError": ".errors",
+    "MethodNotAllowedError": ".errors",
+    "NotFoundError": ".errors",
+    "NullEvaluationRuleFilter": ".commons",
+    "NumberEvaluationRuleFilter": ".commons",
+    "NumberObjectEvaluationRuleFilter": ".commons",
+    "PublicApiError": ".errors",
+    "PublicApiErrorCode": ".errors",
+    "PublicApiErrorDetails": ".errors",
+    "PublicApiValidationIssue": ".errors",
+    "PublicBooleanEvaluatorOutputDefinition": ".commons",
+    "PublicCategoricalEvaluatorOutputDefinition": ".commons",
+    "PublicCategoricalEvaluatorOutputScoreDefinition": ".commons",
+    "PublicEvaluatorOutputDefinition": ".commons",
+    "PublicEvaluatorOutputDefinition_Boolean": ".commons",
+    "PublicEvaluatorOutputDefinition_Categorical": ".commons",
+    "PublicEvaluatorOutputDefinition_Numeric": ".commons",
+    "PublicNumericEvaluatorOutputDefinition": ".commons",
+    "StringEvaluationRuleFilter": ".commons",
+    "StringObjectEvaluationRuleFilter": ".commons",
+    "StringOptionsEvaluationRuleFilter": ".commons",
+    "TooManyRequestsError": ".errors",
+    "UnauthorizedError": ".errors",
+    "UnprocessableContentError": ".errors",
+    "UpdateEvaluationRuleRequest": ".evaluation_rules",
+    "commons": ".commons",
+    "errors": ".errors",
+    "evaluation_rules": ".evaluation_rules",
+    "evaluators": ".evaluators",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "AccessDeniedError",
+    "ArrayOptionsEvaluationRuleFilter",
+    "BadRequestError",
+    "BooleanEvaluationRuleFilter",
+    "CategoryOptionsEvaluationRuleFilter",
+    "ConflictError",
+    "CreateEvaluationRuleRequest",
+    "CreateEvaluatorRequest",
+    "DateTimeEvaluationRuleFilter",
+    "DeleteEvaluationRuleResponse",
+    "EvaluationRule",
+    "EvaluationRuleArrayOptionsFilterOperator",
+    "EvaluationRuleBooleanFilterOperator",
+    "EvaluationRuleEvaluator",
+    "EvaluationRuleEvaluatorReference",
+    "EvaluationRuleFilter",
+    "EvaluationRuleFilter_ArrayOptions",
+    "EvaluationRuleFilter_Boolean",
+    "EvaluationRuleFilter_CategoryOptions",
+    "EvaluationRuleFilter_Datetime",
+    "EvaluationRuleFilter_Null",
+    "EvaluationRuleFilter_Number",
+    "EvaluationRuleFilter_NumberObject",
+    "EvaluationRuleFilter_String",
+    "EvaluationRuleFilter_StringObject",
+    "EvaluationRuleFilter_StringOptions",
+    "EvaluationRuleMapping",
+    "EvaluationRuleMappingSource",
+    "EvaluationRuleNullFilterOperator",
+    "EvaluationRuleNumberFilterOperator",
+    "EvaluationRuleOptionsFilterOperator",
+    "EvaluationRuleStatus",
+    "EvaluationRuleStringFilterOperator",
+    "EvaluationRuleTarget",
+    "EvaluationRules",
+    "Evaluator",
+    "EvaluatorModelConfig",
+    "EvaluatorOutputDataType",
+    "EvaluatorOutputDefinition",
+    "EvaluatorOutputDefinition_Boolean",
+    "EvaluatorOutputDefinition_Categorical",
+    "EvaluatorOutputDefinition_Numeric",
+    "EvaluatorOutputFieldDefinition",
+    "EvaluatorScope",
+    "EvaluatorType",
+    "Evaluators",
+    "InternalServerError",
+    "MethodNotAllowedError",
+    "NotFoundError",
+    "NullEvaluationRuleFilter",
+    "NumberEvaluationRuleFilter",
+    "NumberObjectEvaluationRuleFilter",
+    "PublicApiError",
+    "PublicApiErrorCode",
+    "PublicApiErrorDetails",
+    "PublicApiValidationIssue",
+    "PublicBooleanEvaluatorOutputDefinition",
+    "PublicCategoricalEvaluatorOutputDefinition",
+    "PublicCategoricalEvaluatorOutputScoreDefinition",
+    "PublicEvaluatorOutputDefinition",
+    "PublicEvaluatorOutputDefinition_Boolean",
+    "PublicEvaluatorOutputDefinition_Categorical",
+    "PublicEvaluatorOutputDefinition_Numeric",
+    "PublicNumericEvaluatorOutputDefinition",
+    "StringEvaluationRuleFilter",
+    "StringObjectEvaluationRuleFilter",
+    "StringOptionsEvaluationRuleFilter",
+    "TooManyRequestsError",
+    "UnauthorizedError",
+    "UnprocessableContentError",
+    "UpdateEvaluationRuleRequest",
+    "commons",
+    "errors",
+    "evaluation_rules",
+    "evaluators",
+]
diff --git a/langfuse/api/unstable/client.py b/langfuse/api/unstable/client.py
new file mode 100644
index 000000000..5c3ac32d7
--- /dev/null
+++ b/langfuse/api/unstable/client.py
@@ -0,0 +1,91 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import typing
+
+from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from .raw_client import AsyncRawUnstableClient, RawUnstableClient
+
+if typing.TYPE_CHECKING:
+    from .evaluation_rules.client import (
+        AsyncEvaluationRulesClient,
+        EvaluationRulesClient,
+    )
+    from .evaluators.client import AsyncEvaluatorsClient, EvaluatorsClient
+
+
+class UnstableClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._raw_client = RawUnstableClient(client_wrapper=client_wrapper)
+        self._client_wrapper = client_wrapper
+        self._evaluation_rules: typing.Optional[EvaluationRulesClient] = None
+        self._evaluators: typing.Optional[EvaluatorsClient] = None
+
+    @property
+    def with_raw_response(self) -> RawUnstableClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        RawUnstableClient
+        """
+        return self._raw_client
+
+    @property
+    def evaluation_rules(self):
+        if self._evaluation_rules is None:
+            from .evaluation_rules.client import EvaluationRulesClient  # noqa: E402
+
+            self._evaluation_rules = EvaluationRulesClient(
+                client_wrapper=self._client_wrapper
+            )
+        return self._evaluation_rules
+
+    @property
+    def evaluators(self):
+        if self._evaluators is None:
+            from .evaluators.client import EvaluatorsClient  # noqa: E402
+
+            self._evaluators = EvaluatorsClient(client_wrapper=self._client_wrapper)
+        return self._evaluators
+
+
+class AsyncUnstableClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._raw_client = AsyncRawUnstableClient(client_wrapper=client_wrapper)
+        self._client_wrapper = client_wrapper
+        self._evaluation_rules: typing.Optional[AsyncEvaluationRulesClient] = None
+        self._evaluators: typing.Optional[AsyncEvaluatorsClient] = None
+
+    @property
+    def with_raw_response(self) -> AsyncRawUnstableClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        AsyncRawUnstableClient
+        """
+        return self._raw_client
+
+    @property
+    def evaluation_rules(self):
+        if self._evaluation_rules is None:
+            from .evaluation_rules.client import AsyncEvaluationRulesClient  # noqa: E402
+
+            self._evaluation_rules = AsyncEvaluationRulesClient(
+                client_wrapper=self._client_wrapper
+            )
+        return self._evaluation_rules
+
+    @property
+    def evaluators(self):
+        if self._evaluators is None:
+            from .evaluators.client import AsyncEvaluatorsClient  # noqa: E402
+
+            self._evaluators = AsyncEvaluatorsClient(
+                client_wrapper=self._client_wrapper
+            )
+        return self._evaluators
diff --git a/langfuse/api/unstable/commons/__init__.py b/langfuse/api/unstable/commons/__init__.py
new file mode 100644
index 000000000..13d9571ff
--- /dev/null
+++ b/langfuse/api/unstable/commons/__init__.py
@@ -0,0 +1,187 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .types import (
+        ArrayOptionsEvaluationRuleFilter,
+        BooleanEvaluationRuleFilter,
+        CategoryOptionsEvaluationRuleFilter,
+        DateTimeEvaluationRuleFilter,
+        EvaluationRuleArrayOptionsFilterOperator,
+        EvaluationRuleBooleanFilterOperator,
+        EvaluationRuleFilter,
+        EvaluationRuleFilter_ArrayOptions,
+        EvaluationRuleFilter_Boolean,
+        EvaluationRuleFilter_CategoryOptions,
+        EvaluationRuleFilter_Datetime,
+        EvaluationRuleFilter_Null,
+        EvaluationRuleFilter_Number,
+        EvaluationRuleFilter_NumberObject,
+        EvaluationRuleFilter_String,
+        EvaluationRuleFilter_StringObject,
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleMapping,
+        EvaluationRuleMappingSource,
+        EvaluationRuleNullFilterOperator,
+        EvaluationRuleNumberFilterOperator,
+        EvaluationRuleOptionsFilterOperator,
+        EvaluationRuleStatus,
+        EvaluationRuleStringFilterOperator,
+        EvaluationRuleTarget,
+        EvaluatorModelConfig,
+        EvaluatorOutputDataType,
+        EvaluatorOutputDefinition,
+        EvaluatorOutputDefinition_Boolean,
+        EvaluatorOutputDefinition_Categorical,
+        EvaluatorOutputDefinition_Numeric,
+        EvaluatorOutputFieldDefinition,
+        EvaluatorScope,
+        EvaluatorType,
+        NullEvaluationRuleFilter,
+        NumberEvaluationRuleFilter,
+        NumberObjectEvaluationRuleFilter,
+        PublicBooleanEvaluatorOutputDefinition,
+        PublicCategoricalEvaluatorOutputDefinition,
+        PublicCategoricalEvaluatorOutputScoreDefinition,
+        PublicEvaluatorOutputDefinition,
+        PublicEvaluatorOutputDefinition_Boolean,
+        PublicEvaluatorOutputDefinition_Categorical,
+        PublicEvaluatorOutputDefinition_Numeric,
+        PublicNumericEvaluatorOutputDefinition,
+        StringEvaluationRuleFilter,
+        StringObjectEvaluationRuleFilter,
+        StringOptionsEvaluationRuleFilter,
+    )
+_dynamic_imports: typing.Dict[str, str] = {
+    "ArrayOptionsEvaluationRuleFilter": ".types",
+    "BooleanEvaluationRuleFilter": ".types",
+    "CategoryOptionsEvaluationRuleFilter": ".types",
+    "DateTimeEvaluationRuleFilter": ".types",
+    "EvaluationRuleArrayOptionsFilterOperator": ".types",
+    "EvaluationRuleBooleanFilterOperator": ".types",
+    "EvaluationRuleFilter": ".types",
+    "EvaluationRuleFilter_ArrayOptions": ".types",
+    "EvaluationRuleFilter_Boolean": ".types",
+    "EvaluationRuleFilter_CategoryOptions": ".types",
+    "EvaluationRuleFilter_Datetime": ".types",
+    "EvaluationRuleFilter_Null": ".types",
+    "EvaluationRuleFilter_Number": ".types",
+    "EvaluationRuleFilter_NumberObject": ".types",
+    "EvaluationRuleFilter_String": ".types",
+    "EvaluationRuleFilter_StringObject": ".types",
+    "EvaluationRuleFilter_StringOptions": ".types",
+    "EvaluationRuleMapping": ".types",
+    "EvaluationRuleMappingSource": ".types",
+    "EvaluationRuleNullFilterOperator": ".types",
+    "EvaluationRuleNumberFilterOperator": ".types",
+    "EvaluationRuleOptionsFilterOperator": ".types",
+    "EvaluationRuleStatus": ".types",
+    "EvaluationRuleStringFilterOperator": ".types",
+    "EvaluationRuleTarget": ".types",
+    "EvaluatorModelConfig": ".types",
+    "EvaluatorOutputDataType": ".types",
+    "EvaluatorOutputDefinition": ".types",
+    "EvaluatorOutputDefinition_Boolean": ".types",
+    "EvaluatorOutputDefinition_Categorical": ".types",
+    "EvaluatorOutputDefinition_Numeric": ".types",
+    "EvaluatorOutputFieldDefinition": ".types",
+    "EvaluatorScope": ".types",
+    "EvaluatorType": ".types",
+    "NullEvaluationRuleFilter": ".types",
+    "NumberEvaluationRuleFilter": ".types",
+    "NumberObjectEvaluationRuleFilter": ".types",
+    "PublicBooleanEvaluatorOutputDefinition": ".types",
+    "PublicCategoricalEvaluatorOutputDefinition": ".types",
+    "PublicCategoricalEvaluatorOutputScoreDefinition": ".types",
+    "PublicEvaluatorOutputDefinition": ".types",
+    "PublicEvaluatorOutputDefinition_Boolean": ".types",
+    "PublicEvaluatorOutputDefinition_Categorical": ".types",
+    "PublicEvaluatorOutputDefinition_Numeric": ".types",
+    "PublicNumericEvaluatorOutputDefinition": ".types",
+    "StringEvaluationRuleFilter": ".types",
+    "StringObjectEvaluationRuleFilter": ".types",
+    "StringOptionsEvaluationRuleFilter": ".types",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "ArrayOptionsEvaluationRuleFilter",
+    "BooleanEvaluationRuleFilter",
+    "CategoryOptionsEvaluationRuleFilter",
+    "DateTimeEvaluationRuleFilter",
+    "EvaluationRuleArrayOptionsFilterOperator",
+    "EvaluationRuleBooleanFilterOperator",
+    "EvaluationRuleFilter",
+    "EvaluationRuleFilter_ArrayOptions",
+    "EvaluationRuleFilter_Boolean",
+    "EvaluationRuleFilter_CategoryOptions",
+    "EvaluationRuleFilter_Datetime",
+    "EvaluationRuleFilter_Null",
+    "EvaluationRuleFilter_Number",
+    "EvaluationRuleFilter_NumberObject",
+    "EvaluationRuleFilter_String",
+    "EvaluationRuleFilter_StringObject",
+    "EvaluationRuleFilter_StringOptions",
+    "EvaluationRuleMapping",
+    "EvaluationRuleMappingSource",
+    "EvaluationRuleNullFilterOperator",
+    "EvaluationRuleNumberFilterOperator",
+    "EvaluationRuleOptionsFilterOperator",
+    "EvaluationRuleStatus",
+    "EvaluationRuleStringFilterOperator",
+    "EvaluationRuleTarget",
+    "EvaluatorModelConfig",
+    "EvaluatorOutputDataType",
+    "EvaluatorOutputDefinition",
+    "EvaluatorOutputDefinition_Boolean",
+    "EvaluatorOutputDefinition_Categorical",
+    "EvaluatorOutputDefinition_Numeric",
+    "EvaluatorOutputFieldDefinition",
+    "EvaluatorScope",
+    "EvaluatorType",
+    "NullEvaluationRuleFilter",
+    "NumberEvaluationRuleFilter",
+    "NumberObjectEvaluationRuleFilter",
+    "PublicBooleanEvaluatorOutputDefinition",
+    "PublicCategoricalEvaluatorOutputDefinition",
+    "PublicCategoricalEvaluatorOutputScoreDefinition",
+    "PublicEvaluatorOutputDefinition",
+    "PublicEvaluatorOutputDefinition_Boolean",
+    "PublicEvaluatorOutputDefinition_Categorical",
+    "PublicEvaluatorOutputDefinition_Numeric",
+    "PublicNumericEvaluatorOutputDefinition",
+    "StringEvaluationRuleFilter",
+    "StringObjectEvaluationRuleFilter",
+    "StringOptionsEvaluationRuleFilter",
+]
diff --git a/langfuse/api/unstable/commons/types/__init__.py b/langfuse/api/unstable/commons/types/__init__.py
new file mode 100644
index 000000000..a0e7d9f9d
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/__init__.py
@@ -0,0 +1,211 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .array_options_evaluation_rule_filter import ArrayOptionsEvaluationRuleFilter
+    from .boolean_evaluation_rule_filter import BooleanEvaluationRuleFilter
+    from .category_options_evaluation_rule_filter import (
+        CategoryOptionsEvaluationRuleFilter,
+    )
+    from .date_time_evaluation_rule_filter import DateTimeEvaluationRuleFilter
+    from .evaluation_rule_array_options_filter_operator import (
+        EvaluationRuleArrayOptionsFilterOperator,
+    )
+    from .evaluation_rule_boolean_filter_operator import (
+        EvaluationRuleBooleanFilterOperator,
+    )
+    from .evaluation_rule_filter import (
+        EvaluationRuleFilter,
+        EvaluationRuleFilter_ArrayOptions,
+        EvaluationRuleFilter_Boolean,
+        EvaluationRuleFilter_CategoryOptions,
+        EvaluationRuleFilter_Datetime,
+        EvaluationRuleFilter_Null,
+        EvaluationRuleFilter_Number,
+        EvaluationRuleFilter_NumberObject,
+        EvaluationRuleFilter_String,
+        EvaluationRuleFilter_StringObject,
+        EvaluationRuleFilter_StringOptions,
+    )
+    from .evaluation_rule_mapping import EvaluationRuleMapping
+    from .evaluation_rule_mapping_source import EvaluationRuleMappingSource
+    from .evaluation_rule_null_filter_operator import EvaluationRuleNullFilterOperator
+    from .evaluation_rule_number_filter_operator import (
+        EvaluationRuleNumberFilterOperator,
+    )
+    from .evaluation_rule_options_filter_operator import (
+        EvaluationRuleOptionsFilterOperator,
+    )
+    from .evaluation_rule_status import EvaluationRuleStatus
+    from .evaluation_rule_string_filter_operator import (
+        EvaluationRuleStringFilterOperator,
+    )
+    from .evaluation_rule_target import EvaluationRuleTarget
+    from .evaluator_model_config import EvaluatorModelConfig
+    from .evaluator_output_data_type import EvaluatorOutputDataType
+    from .evaluator_output_definition import (
+        EvaluatorOutputDefinition,
+        EvaluatorOutputDefinition_Boolean,
+        EvaluatorOutputDefinition_Categorical,
+        EvaluatorOutputDefinition_Numeric,
+    )
+    from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition
+    from .evaluator_scope import EvaluatorScope
+    from .evaluator_type import EvaluatorType
+    from .null_evaluation_rule_filter import NullEvaluationRuleFilter
+    from .number_evaluation_rule_filter import NumberEvaluationRuleFilter
+    from .number_object_evaluation_rule_filter import NumberObjectEvaluationRuleFilter
+    from .public_boolean_evaluator_output_definition import (
+        PublicBooleanEvaluatorOutputDefinition,
+    )
+    from .public_categorical_evaluator_output_definition import (
+        PublicCategoricalEvaluatorOutputDefinition,
+    )
+    from .public_categorical_evaluator_output_score_definition import (
+        PublicCategoricalEvaluatorOutputScoreDefinition,
+    )
+    from .public_evaluator_output_definition import (
+        PublicEvaluatorOutputDefinition,
+        PublicEvaluatorOutputDefinition_Boolean,
+        PublicEvaluatorOutputDefinition_Categorical,
+        PublicEvaluatorOutputDefinition_Numeric,
+    )
+    from .public_numeric_evaluator_output_definition import (
+        PublicNumericEvaluatorOutputDefinition,
+    )
+    from .string_evaluation_rule_filter import StringEvaluationRuleFilter
+    from .string_object_evaluation_rule_filter import StringObjectEvaluationRuleFilter
+    from .string_options_evaluation_rule_filter import StringOptionsEvaluationRuleFilter
+_dynamic_imports: typing.Dict[str, str] = {
+    "ArrayOptionsEvaluationRuleFilter": ".array_options_evaluation_rule_filter",
+    "BooleanEvaluationRuleFilter": ".boolean_evaluation_rule_filter",
+    "CategoryOptionsEvaluationRuleFilter": ".category_options_evaluation_rule_filter",
+    "DateTimeEvaluationRuleFilter": ".date_time_evaluation_rule_filter",
+    "EvaluationRuleArrayOptionsFilterOperator": ".evaluation_rule_array_options_filter_operator",
+    "EvaluationRuleBooleanFilterOperator": ".evaluation_rule_boolean_filter_operator",
+    "EvaluationRuleFilter": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_ArrayOptions": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_Boolean": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_CategoryOptions": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_Datetime": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_Null": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_Number": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_NumberObject": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_String": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_StringObject": ".evaluation_rule_filter",
+    "EvaluationRuleFilter_StringOptions": ".evaluation_rule_filter",
+    "EvaluationRuleMapping": ".evaluation_rule_mapping",
+    "EvaluationRuleMappingSource": ".evaluation_rule_mapping_source",
+    "EvaluationRuleNullFilterOperator": ".evaluation_rule_null_filter_operator",
+    "EvaluationRuleNumberFilterOperator": ".evaluation_rule_number_filter_operator",
+    "EvaluationRuleOptionsFilterOperator": ".evaluation_rule_options_filter_operator",
+    "EvaluationRuleStatus": ".evaluation_rule_status",
+    "EvaluationRuleStringFilterOperator": ".evaluation_rule_string_filter_operator",
+    "EvaluationRuleTarget": ".evaluation_rule_target",
+    "EvaluatorModelConfig": ".evaluator_model_config",
+    "EvaluatorOutputDataType": ".evaluator_output_data_type",
+    "EvaluatorOutputDefinition": ".evaluator_output_definition",
+    "EvaluatorOutputDefinition_Boolean": ".evaluator_output_definition",
+    "EvaluatorOutputDefinition_Categorical": ".evaluator_output_definition",
+    "EvaluatorOutputDefinition_Numeric": ".evaluator_output_definition",
+    "EvaluatorOutputFieldDefinition": ".evaluator_output_field_definition",
+    "EvaluatorScope": ".evaluator_scope",
+    "EvaluatorType": ".evaluator_type",
+    "NullEvaluationRuleFilter": ".null_evaluation_rule_filter",
+    "NumberEvaluationRuleFilter": ".number_evaluation_rule_filter",
+    "NumberObjectEvaluationRuleFilter": ".number_object_evaluation_rule_filter",
+    "PublicBooleanEvaluatorOutputDefinition": ".public_boolean_evaluator_output_definition",
+    "PublicCategoricalEvaluatorOutputDefinition": ".public_categorical_evaluator_output_definition",
+    "PublicCategoricalEvaluatorOutputScoreDefinition": ".public_categorical_evaluator_output_score_definition",
+    "PublicEvaluatorOutputDefinition": ".public_evaluator_output_definition",
+    "PublicEvaluatorOutputDefinition_Boolean": ".public_evaluator_output_definition",
+    "PublicEvaluatorOutputDefinition_Categorical": ".public_evaluator_output_definition",
+    "PublicEvaluatorOutputDefinition_Numeric": ".public_evaluator_output_definition",
+    "PublicNumericEvaluatorOutputDefinition": ".public_numeric_evaluator_output_definition",
+    "StringEvaluationRuleFilter": ".string_evaluation_rule_filter",
+    "StringObjectEvaluationRuleFilter": ".string_object_evaluation_rule_filter",
+    "StringOptionsEvaluationRuleFilter": ".string_options_evaluation_rule_filter",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "ArrayOptionsEvaluationRuleFilter",
+    "BooleanEvaluationRuleFilter",
+    "CategoryOptionsEvaluationRuleFilter",
+    "DateTimeEvaluationRuleFilter",
+    "EvaluationRuleArrayOptionsFilterOperator",
+    "EvaluationRuleBooleanFilterOperator",
+    "EvaluationRuleFilter",
+    "EvaluationRuleFilter_ArrayOptions",
+    "EvaluationRuleFilter_Boolean",
+    "EvaluationRuleFilter_CategoryOptions",
+    "EvaluationRuleFilter_Datetime",
+    "EvaluationRuleFilter_Null",
+    "EvaluationRuleFilter_Number",
+    "EvaluationRuleFilter_NumberObject",
+    "EvaluationRuleFilter_String",
+    "EvaluationRuleFilter_StringObject",
+    "EvaluationRuleFilter_StringOptions",
+    "EvaluationRuleMapping",
+    "EvaluationRuleMappingSource",
+    "EvaluationRuleNullFilterOperator",
+    "EvaluationRuleNumberFilterOperator",
+    "EvaluationRuleOptionsFilterOperator",
+    "EvaluationRuleStatus",
+    "EvaluationRuleStringFilterOperator",
+    "EvaluationRuleTarget",
+    "EvaluatorModelConfig",
+    "EvaluatorOutputDataType",
+    "EvaluatorOutputDefinition",
+    "EvaluatorOutputDefinition_Boolean",
+    "EvaluatorOutputDefinition_Categorical",
+    "EvaluatorOutputDefinition_Numeric",
+    "EvaluatorOutputFieldDefinition",
+    "EvaluatorScope",
+    "EvaluatorType",
+    "NullEvaluationRuleFilter",
+    "NumberEvaluationRuleFilter",
+    "NumberObjectEvaluationRuleFilter",
+    "PublicBooleanEvaluatorOutputDefinition",
+    "PublicCategoricalEvaluatorOutputDefinition",
+    "PublicCategoricalEvaluatorOutputScoreDefinition",
+    "PublicEvaluatorOutputDefinition",
+    "PublicEvaluatorOutputDefinition_Boolean",
+    "PublicEvaluatorOutputDefinition_Categorical",
+    "PublicEvaluatorOutputDefinition_Numeric",
+    "PublicNumericEvaluatorOutputDefinition",
+    "StringEvaluationRuleFilter",
+    "StringObjectEvaluationRuleFilter",
+    "StringOptionsEvaluationRuleFilter",
+]
diff --git a/langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py
new file mode 100644
index 000000000..c89ce8b16
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_array_options_filter_operator import (
+    EvaluationRuleArrayOptionsFilterOperator,
+)
+
+
+class ArrayOptionsEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Column to filter on.
+    """
+
+    operator: EvaluationRuleArrayOptionsFilterOperator
+    value: typing.List[str] = pydantic.Field()
+    """
+    One or more array elements to match.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py
new file mode 100644
index 000000000..666b691bb
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py
@@ -0,0 +1,21 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_boolean_filter_operator import EvaluationRuleBooleanFilterOperator
+
+
+class BooleanEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Column to filter on.
+    """
+
+    operator: EvaluationRuleBooleanFilterOperator
+    value: bool
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py
new file mode 100644
index 000000000..97f13ae62
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_options_filter_operator import EvaluationRuleOptionsFilterOperator
+
+
+class CategoryOptionsEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Object-valued column to filter on.
+    """
+
+    key: str = pydantic.Field()
+    """
+    Key inside the object-valued column to filter on.
+    """
+
+    operator: EvaluationRuleOptionsFilterOperator
+    value: typing.List[str]
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py
new file mode 100644
index 000000000..9ee23b1fe
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py
@@ -0,0 +1,29 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator
+
+
+class DateTimeEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Column to filter on.
+    """
+
+    operator: EvaluationRuleNumberFilterOperator = pydantic.Field()
+    """
+    Comparison operator for datetime values.
+    """
+
+    value: dt.datetime = pydantic.Field()
+    """
+    Datetime value to compare against.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py
new file mode 100644
index 000000000..ba8f49a13
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleArrayOptionsFilterOperator(enum.StrEnum):
+    ANY_OF = "any of"
+    NONE_OF = "none of"
+    ALL_OF = "all of"
+
+    def visit(
+        self,
+        any_of: typing.Callable[[], T_Result],
+        none_of: typing.Callable[[], T_Result],
+        all_of: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleArrayOptionsFilterOperator.ANY_OF:
+            return any_of()
+        if self is EvaluationRuleArrayOptionsFilterOperator.NONE_OF:
+            return none_of()
+        if self is EvaluationRuleArrayOptionsFilterOperator.ALL_OF:
+            return all_of()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py
new file mode 100644
index 000000000..737d6063a
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py
@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleBooleanFilterOperator(enum.StrEnum):
+    EQUALS = "="
+    NOT_EQUALS = "<>"
+
+    def visit(
+        self,
+        equals: typing.Callable[[], T_Result],
+        not_equals: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleBooleanFilterOperator.EQUALS:
+            return equals()
+        if self is EvaluationRuleBooleanFilterOperator.NOT_EQUALS:
+            return not_equals()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/evaluation_rule_filter.py
new file mode 100644
index 000000000..ea5e0420b
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_filter.py
@@ -0,0 +1,740 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import datetime as dt
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_array_options_filter_operator import (
+    EvaluationRuleArrayOptionsFilterOperator,
+)
+from .evaluation_rule_boolean_filter_operator import EvaluationRuleBooleanFilterOperator
+from .evaluation_rule_null_filter_operator import EvaluationRuleNullFilterOperator
+from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator
+from .evaluation_rule_options_filter_operator import EvaluationRuleOptionsFilterOperator
+from .evaluation_rule_string_filter_operator import EvaluationRuleStringFilterOperator
+
+
+class EvaluationRuleFilter_Datetime(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["datetime"] = "datetime"
+    column: str
+    operator: EvaluationRuleNumberFilterOperator
+    value: dt.datetime
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_String(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["string"] = "string"
+    column: str
+    operator: EvaluationRuleStringFilterOperator
+    value: str
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_Number(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["number"] = "number"
+    column: str
+    operator: EvaluationRuleNumberFilterOperator
+    value: float
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_StringOptions(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["stringOptions"] = "stringOptions"
+    column: str
+    operator: EvaluationRuleOptionsFilterOperator
+    value: typing.List[str]
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_CategoryOptions(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["categoryOptions"] = "categoryOptions"
+    column: str
+    key: str
+    operator: EvaluationRuleOptionsFilterOperator
+    value: typing.List[str]
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_ArrayOptions(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["arrayOptions"] = "arrayOptions"
+    column: str
+    operator: EvaluationRuleArrayOptionsFilterOperator
+    value: typing.List[str]
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_StringObject(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["stringObject"] = "stringObject"
+    column: str
+    key: str
+    operator: EvaluationRuleStringFilterOperator
+    value: str
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_NumberObject(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["numberObject"] = "numberObject"
+    column: str
+    key: str
+    operator: EvaluationRuleNumberFilterOperator
+    value: float
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_Boolean(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["boolean"] = "boolean"
+    column: str
+    operator: EvaluationRuleBooleanFilterOperator
+    value: bool
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluationRuleFilter_Null(UniversalBaseModel):
+    """
+    One filter condition used to decide whether a live-ingested target should be evaluated.
+
+    An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run.
+
+    How to build a valid filter object:
+    - Pick the `target` first, because it changes the supported columns.
+    - Pick the filter `type`. That determines which fields are required.
+    - Use `key` only for object filters such as `metadata`.
+    - Use the correct `value` shape for the chosen filter `type`.
+
+    Operator quick reference by filter `type`:
+    - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with`
+    - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="`
+    - `stringOptions`: `any of`, `none of`
+    - `arrayOptions`: `any of`, `none of`, `all of`
+    - `stringObject`: same operators as `string`
+    - `null`: `is null`, `is not null`
+
+    Supported columns by target:
+    - `target=observation`
+      - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT`
+      - `name`: `stringOptions`, operators `any of` / `none of`
+      - `environment`: `stringOptions`, operators `any of` / `none of`
+      - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR`
+      - `version`: `string`
+      - `traceName`: `stringOptions`, operators `any of` / `none of`
+      - `userId`: `string`
+      - `sessionId`: `string`
+      - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `metadata`: `stringObject` with `key`
+      - `parentObservationId`: `null`, operators `is null` / `is not null`
+      - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of`
+      - `toolCalls`: `number`
+    - `target=experiment`
+      - `datasetId`: `stringOptions`, operators `any of` / `none of`
+        Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+    Recovery guidance:
+    - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target`
+    - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided.
+    - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleOptionsFilterOperator,
+    )
+
+    EvaluationRuleFilter_StringOptions(
+        column="type",
+        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+        value=["GENERATION"],
+    )
+    """
+
+    type: typing.Literal["null"] = "null"
+    column: str
+    operator: EvaluationRuleNullFilterOperator
+    value: typing.Optional[str] = None
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+"""
+from langfuse.unstable.commons import (
+    EvaluationRuleFilter_StringOptions,
+    EvaluationRuleOptionsFilterOperator,
+)
+
+EvaluationRuleFilter_StringOptions(
+    column="type",
+    operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+    value=["GENERATION"],
+)
+"""
+EvaluationRuleFilter = typing_extensions.Annotated[
+    typing.Union[
+        EvaluationRuleFilter_Datetime,
+        EvaluationRuleFilter_String,
+        EvaluationRuleFilter_Number,
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleFilter_CategoryOptions,
+        EvaluationRuleFilter_ArrayOptions,
+        EvaluationRuleFilter_StringObject,
+        EvaluationRuleFilter_NumberObject,
+        EvaluationRuleFilter_Boolean,
+        EvaluationRuleFilter_Null,
+    ],
+    pydantic.Field(discriminator="type"),
+]
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_mapping.py b/langfuse/api/unstable/commons/types/evaluation_rule_mapping.py
new file mode 100644
index 000000000..1c407819c
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_mapping.py
@@ -0,0 +1,74 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from .evaluation_rule_mapping_source import EvaluationRuleMappingSource
+
+
+class EvaluationRuleMapping(UniversalBaseModel):
+    """
+    Maps one evaluator prompt variable to one source field from the target object.
+
+    How to build a valid mapping list:
+    1. Create the evaluator or fetch it with `GET /evaluators/{id}`.
+    2. Read the evaluator `variables` array.
+    3. Add exactly one mapping object for each variable in that array.
+    4. Use the variable name exactly as returned, without braces such as `{{` or `}}`.
+    5. Choose a `source` that is valid for the selected `target`.
+
+    `jsonPath` is optional. Use it only when the selected source is a JSON object and you want to extract one nested field before inserting it into the evaluator prompt.
+
+    Recovery guidance:
+    - `invalid_variable_mapping`: the variable name is unknown for this evaluator, or the selected `source` is not valid for the chosen `target`
+    - `missing_variable_mapping`: one or more evaluator variables are not mapped yet
+    - `duplicate_variable_mapping`: the same evaluator variable appears more than once
+    - `invalid_json_path`: the JSONPath expression is malformed. Remove it or correct it.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluationRuleMapping,
+        EvaluationRuleMappingSource,
+    )
+
+    EvaluationRuleMapping(
+        variable="input",
+        source=EvaluationRuleMappingSource.INPUT,
+    )
+    """
+
+    variable: str = pydantic.Field()
+    """
+    Prompt variable name without braces.
+    
+    Example: for the prompt `Judge {{input}} against {{output}}`, use `input` and `output`.
+    """
+
+    source: EvaluationRuleMappingSource = pydantic.Field()
+    """
+    Source field that should populate the prompt variable.
+    
+    Quick reference:
+    - `target=observation`: `input`, `output`, `metadata`
+    - `target=experiment`: `input`, `output`, `metadata`, `expected_output`, `experiment_item_metadata`
+    """
+
+    json_path: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="jsonPath")
+    ] = pydantic.Field(default=None)
+    """
+    Optional JSONPath selector applied to the selected source before it is passed to the evaluator prompt.
+    
+    Requirements:
+    - Must start with `$`
+    - Must be a syntactically valid JSONPath expression
+    - Most useful with `source=metadata`
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py b/langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py
new file mode 100644
index 000000000..391c66bbd
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py
@@ -0,0 +1,51 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleMappingSource(enum.StrEnum):
+    """
+    Source field used to populate a prompt variable.
+
+    Use these values when mapping evaluator prompt variables to live data.
+
+    Target-specific rules:
+    - `target=observation` supports `input`, `output`, and `metadata`
+    - `target=experiment` supports `input`, `output`, `metadata`, `expected_output`, and `experiment_item_metadata`
+
+    Source semantics:
+    - `input`: the observation or experiment input payload
+    - `output`: the observation or experiment output payload
+    - `metadata`: the metadata object for the target. Combine with `jsonPath` when you need one nested field instead of the whole object.
+    - `expected_output`: the experiment item's expected output. Only valid for `target=experiment`.
+    - `experiment_item_metadata`: the experiment item's metadata object. Only valid for `target=experiment`.
+    """
+
+    INPUT = "input"
+    OUTPUT = "output"
+    METADATA = "metadata"
+    EXPECTED_OUTPUT = "expected_output"
+    EXPERIMENT_ITEM_METADATA = "experiment_item_metadata"
+
+    def visit(
+        self,
+        input: typing.Callable[[], T_Result],
+        output: typing.Callable[[], T_Result],
+        metadata: typing.Callable[[], T_Result],
+        expected_output: typing.Callable[[], T_Result],
+        experiment_item_metadata: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleMappingSource.INPUT:
+            return input()
+        if self is EvaluationRuleMappingSource.OUTPUT:
+            return output()
+        if self is EvaluationRuleMappingSource.METADATA:
+            return metadata()
+        if self is EvaluationRuleMappingSource.EXPECTED_OUTPUT:
+            return expected_output()
+        if self is EvaluationRuleMappingSource.EXPERIMENT_ITEM_METADATA:
+            return experiment_item_metadata()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py
new file mode 100644
index 000000000..833c8406f
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py
@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleNullFilterOperator(enum.StrEnum):
+    IS_NULL = "is null"
+    IS_NOT_NULL = "is not null"
+
+    def visit(
+        self,
+        is_null: typing.Callable[[], T_Result],
+        is_not_null: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleNullFilterOperator.IS_NULL:
+            return is_null()
+        if self is EvaluationRuleNullFilterOperator.IS_NOT_NULL:
+            return is_not_null()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py
new file mode 100644
index 000000000..927523e04
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py
@@ -0,0 +1,34 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleNumberFilterOperator(enum.StrEnum):
+    EQUALS = "="
+    GREATER_THAN = ">"
+    LESS_THAN = "<"
+    GREATER_THAN_OR_EQUAL = ">="
+    LESS_THAN_OR_EQUAL = "<="
+
+    def visit(
+        self,
+        equals: typing.Callable[[], T_Result],
+        greater_than: typing.Callable[[], T_Result],
+        less_than: typing.Callable[[], T_Result],
+        greater_than_or_equal: typing.Callable[[], T_Result],
+        less_than_or_equal: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleNumberFilterOperator.EQUALS:
+            return equals()
+        if self is EvaluationRuleNumberFilterOperator.GREATER_THAN:
+            return greater_than()
+        if self is EvaluationRuleNumberFilterOperator.LESS_THAN:
+            return less_than()
+        if self is EvaluationRuleNumberFilterOperator.GREATER_THAN_OR_EQUAL:
+            return greater_than_or_equal()
+        if self is EvaluationRuleNumberFilterOperator.LESS_THAN_OR_EQUAL:
+            return less_than_or_equal()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py
new file mode 100644
index 000000000..01cd13ea3
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py
@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleOptionsFilterOperator(enum.StrEnum):
+    ANY_OF = "any of"
+    NONE_OF = "none of"
+
+    def visit(
+        self,
+        any_of: typing.Callable[[], T_Result],
+        none_of: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleOptionsFilterOperator.ANY_OF:
+            return any_of()
+        if self is EvaluationRuleOptionsFilterOperator.NONE_OF:
+            return none_of()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_status.py b/langfuse/api/unstable/commons/types/evaluation_rule_status.py
new file mode 100644
index 000000000..4a313a962
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_status.py
@@ -0,0 +1,34 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleStatus(enum.StrEnum):
+    """
+    Effective runtime status of the evaluation rule.
+
+    - `active`: enabled and currently runnable.
+    - `inactive`: disabled by configuration.
+    - `paused`: enabled, but Langfuse has blocked execution until the underlying issue is resolved.
+    """
+
+    ACTIVE = "active"
+    INACTIVE = "inactive"
+    PAUSED = "paused"
+
+    def visit(
+        self,
+        active: typing.Callable[[], T_Result],
+        inactive: typing.Callable[[], T_Result],
+        paused: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleStatus.ACTIVE:
+            return active()
+        if self is EvaluationRuleStatus.INACTIVE:
+            return inactive()
+        if self is EvaluationRuleStatus.PAUSED:
+            return paused()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py
new file mode 100644
index 000000000..9955172b9
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py
@@ -0,0 +1,34 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleStringFilterOperator(enum.StrEnum):
+    EQUALS = "="
+    CONTAINS = "contains"
+    DOES_NOT_CONTAIN = "does not contain"
+    STARTS_WITH = "starts with"
+    ENDS_WITH = "ends with"
+
+    def visit(
+        self,
+        equals: typing.Callable[[], T_Result],
+        contains: typing.Callable[[], T_Result],
+        does_not_contain: typing.Callable[[], T_Result],
+        starts_with: typing.Callable[[], T_Result],
+        ends_with: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleStringFilterOperator.EQUALS:
+            return equals()
+        if self is EvaluationRuleStringFilterOperator.CONTAINS:
+            return contains()
+        if self is EvaluationRuleStringFilterOperator.DOES_NOT_CONTAIN:
+            return does_not_contain()
+        if self is EvaluationRuleStringFilterOperator.STARTS_WITH:
+            return starts_with()
+        if self is EvaluationRuleStringFilterOperator.ENDS_WITH:
+            return ends_with()
diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_target.py b/langfuse/api/unstable/commons/types/evaluation_rule_target.py
new file mode 100644
index 000000000..186aa461c
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluation_rule_target.py
@@ -0,0 +1,33 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluationRuleTarget(enum.StrEnum):
+    """
+    The ingestion object type that should trigger evaluation runs.
+
+    Choose the target first, because it changes both the valid filter columns and the valid variable-mapping sources:
+    - `observation` evaluates live-ingested observations such as generations, spans, and events.
+      It supports mapping from `input`, `output`, and `metadata`.
+    - `experiment` evaluates live experiment executions and can additionally map `expected_output` and `experiment_item_metadata`.
+      It currently supports filtering by `datasetId`.
+      Discover valid dataset IDs with `GET /api/public/v2/datasets`, then use the returned dataset `id` values in your filter.
+    """
+
+    OBSERVATION = "observation"
+    EXPERIMENT = "experiment"
+
+    def visit(
+        self,
+        observation: typing.Callable[[], T_Result],
+        experiment: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluationRuleTarget.OBSERVATION:
+            return observation()
+        if self is EvaluationRuleTarget.EXPERIMENT:
+            return experiment()
diff --git a/langfuse/api/unstable/commons/types/evaluator_model_config.py b/langfuse/api/unstable/commons/types/evaluator_model_config.py
new file mode 100644
index 000000000..5473cca8f
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluator_model_config.py
@@ -0,0 +1,46 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+
+
+class EvaluatorModelConfig(UniversalBaseModel):
+    """
+    Optional explicit model configuration for an evaluator.
+
+    If omitted, Langfuse uses the project's default evaluation model.
+    If provided, the model must be available to the project when the evaluator or evaluation rule is enabled.
+
+    To discover valid configured `provider` values for a project, call `GET /api/public/llm-connections` and read the `provider` field from the returned connections.
+    Use a `provider` value that matches one of the connections already configured in the same project.
+
+    Recovery guidance:
+    - If evaluator creation returns `422` with `code=evaluator_preflight_failed`, either provide a valid explicit `modelConfig` here or configure the project's default evaluation model, then retry the same request.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import EvaluatorModelConfig
+
+    EvaluatorModelConfig(
+        provider="openai",
+        model="gpt-4.1-mini",
+    )
+    """
+
+    provider: str = pydantic.Field()
+    """
+    Provider identifier to use for this evaluator, for example `openai` or `anthropic`.
+    
+    To discover valid values for the current project, call `GET /api/public/llm-connections` and use one of the returned `provider` values.
+    """
+
+    model: str = pydantic.Field()
+    """
+    Model identifier exposed by the provider, for example `gpt-4.1-mini`.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/evaluator_output_data_type.py b/langfuse/api/unstable/commons/types/evaluator_output_data_type.py
new file mode 100644
index 000000000..a6c309868
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluator_output_data_type.py
@@ -0,0 +1,35 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluatorOutputDataType(enum.StrEnum):
+    """
+    Structured score type returned by an evaluator.
+
+    This controls the type of score value Langfuse stores for evaluation results:
+    - `NUMERIC`: a numeric score such as `0.82`
+    - `BOOLEAN`: a boolean score such as `true`
+    - `CATEGORICAL`: one or more category labels from a fixed list
+    """
+
+    NUMERIC = "NUMERIC"
+    BOOLEAN = "BOOLEAN"
+    CATEGORICAL = "CATEGORICAL"
+
+    def visit(
+        self,
+        numeric: typing.Callable[[], T_Result],
+        boolean: typing.Callable[[], T_Result],
+        categorical: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluatorOutputDataType.NUMERIC:
+            return numeric()
+        if self is EvaluatorOutputDataType.BOOLEAN:
+            return boolean()
+        if self is EvaluatorOutputDataType.CATEGORICAL:
+            return categorical()
diff --git a/langfuse/api/unstable/commons/types/evaluator_output_definition.py b/langfuse/api/unstable/commons/types/evaluator_output_definition.py
new file mode 100644
index 000000000..f545a19a8
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluator_output_definition.py
@@ -0,0 +1,161 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition
+from .public_categorical_evaluator_output_score_definition import (
+    PublicCategoricalEvaluatorOutputScoreDefinition,
+)
+
+
+class EvaluatorOutputDefinition_Numeric(UniversalBaseModel):
+    """
+    Structured output definition to send when creating an evaluator.
+
+    Agent guidance:
+    - `dataType` is required.
+    - Do not send `version`; that is an internal storage detail and is not part of the public request contract.
+    - For `NUMERIC` and `BOOLEAN`, provide `reasoning.description` and `score.description`.
+    - For `CATEGORICAL`, also provide `score.categories` and `score.shouldAllowMultipleMatches`.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluatorOutputDataType,
+        EvaluatorOutputDefinition_Numeric,
+        EvaluatorOutputFieldDefinition,
+    )
+
+    EvaluatorOutputDefinition_Numeric(
+        data_type=EvaluatorOutputDataType.NUMERIC,
+        reasoning=EvaluatorOutputFieldDefinition(
+            description="Explain why the answer is correct or incorrect.",
+        ),
+        score=EvaluatorOutputFieldDefinition(
+            description="Return a score between 0 and 1.",
+        ),
+    )
+    """
+
+    data_type: typing_extensions.Annotated[
+        typing.Literal["NUMERIC"], FieldMetadata(alias="dataType")
+    ] = "NUMERIC"
+    reasoning: EvaluatorOutputFieldDefinition
+    score: EvaluatorOutputFieldDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluatorOutputDefinition_Boolean(UniversalBaseModel):
+    """
+    Structured output definition to send when creating an evaluator.
+
+    Agent guidance:
+    - `dataType` is required.
+    - Do not send `version`; that is an internal storage detail and is not part of the public request contract.
+    - For `NUMERIC` and `BOOLEAN`, provide `reasoning.description` and `score.description`.
+    - For `CATEGORICAL`, also provide `score.categories` and `score.shouldAllowMultipleMatches`.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluatorOutputDataType,
+        EvaluatorOutputDefinition_Numeric,
+        EvaluatorOutputFieldDefinition,
+    )
+
+    EvaluatorOutputDefinition_Numeric(
+        data_type=EvaluatorOutputDataType.NUMERIC,
+        reasoning=EvaluatorOutputFieldDefinition(
+            description="Explain why the answer is correct or incorrect.",
+        ),
+        score=EvaluatorOutputFieldDefinition(
+            description="Return a score between 0 and 1.",
+        ),
+    )
+    """
+
+    data_type: typing_extensions.Annotated[
+        typing.Literal["BOOLEAN"], FieldMetadata(alias="dataType")
+    ] = "BOOLEAN"
+    reasoning: EvaluatorOutputFieldDefinition
+    score: EvaluatorOutputFieldDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class EvaluatorOutputDefinition_Categorical(UniversalBaseModel):
+    """
+    Structured output definition to send when creating an evaluator.
+
+    Agent guidance:
+    - `dataType` is required.
+    - Do not send `version`; that is an internal storage detail and is not part of the public request contract.
+    - For `NUMERIC` and `BOOLEAN`, provide `reasoning.description` and `score.description`.
+    - For `CATEGORICAL`, also provide `score.categories` and `score.shouldAllowMultipleMatches`.
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluatorOutputDataType,
+        EvaluatorOutputDefinition_Numeric,
+        EvaluatorOutputFieldDefinition,
+    )
+
+    EvaluatorOutputDefinition_Numeric(
+        data_type=EvaluatorOutputDataType.NUMERIC,
+        reasoning=EvaluatorOutputFieldDefinition(
+            description="Explain why the answer is correct or incorrect.",
+        ),
+        score=EvaluatorOutputFieldDefinition(
+            description="Return a score between 0 and 1.",
+        ),
+    )
+    """
+
+    data_type: typing_extensions.Annotated[
+        typing.Literal["CATEGORICAL"], FieldMetadata(alias="dataType")
+    ] = "CATEGORICAL"
+    reasoning: EvaluatorOutputFieldDefinition
+    score: PublicCategoricalEvaluatorOutputScoreDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+"""
+from langfuse.unstable.commons import (
+    EvaluatorOutputDataType,
+    EvaluatorOutputDefinition_Numeric,
+    EvaluatorOutputFieldDefinition,
+)
+
+EvaluatorOutputDefinition_Numeric(
+    data_type=EvaluatorOutputDataType.NUMERIC,
+    reasoning=EvaluatorOutputFieldDefinition(
+        description="Explain why the answer is correct or incorrect.",
+    ),
+    score=EvaluatorOutputFieldDefinition(
+        description="Return a score between 0 and 1.",
+    ),
+)
+"""
+EvaluatorOutputDefinition = typing_extensions.Annotated[
+    typing.Union[
+        EvaluatorOutputDefinition_Numeric,
+        EvaluatorOutputDefinition_Boolean,
+        EvaluatorOutputDefinition_Categorical,
+    ],
+    pydantic.Field(discriminator="data_type"),
+]
diff --git a/langfuse/api/unstable/commons/types/evaluator_output_field_definition.py b/langfuse/api/unstable/commons/types/evaluator_output_field_definition.py
new file mode 100644
index 000000000..419610d0a
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluator_output_field_definition.py
@@ -0,0 +1,17 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+
+
+class EvaluatorOutputFieldDefinition(UniversalBaseModel):
+    description: str = pydantic.Field()
+    """
+    Human-readable instructions for what the evaluator should return in this field.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/evaluator_scope.py b/langfuse/api/unstable/commons/types/evaluator_scope.py
new file mode 100644
index 000000000..7ce796418
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluator_scope.py
@@ -0,0 +1,29 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluatorScope(enum.StrEnum):
+    """
+    Where an evaluator comes from.
+
+    - `project`: created in your project
+    - `managed`: provided by Langfuse
+    """
+
+    PROJECT = "project"
+    MANAGED = "managed"
+
+    def visit(
+        self,
+        project: typing.Callable[[], T_Result],
+        managed: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is EvaluatorScope.PROJECT:
+            return project()
+        if self is EvaluatorScope.MANAGED:
+            return managed()
diff --git a/langfuse/api/unstable/commons/types/evaluator_type.py b/langfuse/api/unstable/commons/types/evaluator_type.py
new file mode 100644
index 000000000..d411d6111
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/evaluator_type.py
@@ -0,0 +1,21 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class EvaluatorType(enum.StrEnum):
+    """
+    The evaluator engine type.
+
+    The unstable public API currently supports only LLM-as-a-judge evaluators.
+    """
+
+    LLM_AS_JUDGE = "llm_as_judge"
+
+    def visit(self, llm_as_judge: typing.Callable[[], T_Result]) -> T_Result:
+        if self is EvaluatorType.LLM_AS_JUDGE:
+            return llm_as_judge()
diff --git a/langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py
new file mode 100644
index 000000000..d224d7590
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py
@@ -0,0 +1,24 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_null_filter_operator import EvaluationRuleNullFilterOperator
+
+
+class NullEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Column to filter on. In the unstable public API this is currently `parentObservationId`.
+    """
+
+    operator: EvaluationRuleNullFilterOperator
+    value: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Ignored placeholder value. Clients may omit it or send an empty string.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py
new file mode 100644
index 000000000..f9c489291
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py
@@ -0,0 +1,21 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator
+
+
+class NumberEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Column to filter on.
+    """
+
+    operator: EvaluationRuleNumberFilterOperator
+    value: float
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py
new file mode 100644
index 000000000..fd9462174
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator
+
+
+class NumberObjectEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Object-valued column to filter on.
+    """
+
+    key: str = pydantic.Field()
+    """
+    Key inside the object-valued column to filter on.
+    """
+
+    operator: EvaluationRuleNumberFilterOperator
+    value: float
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py
new file mode 100644
index 000000000..7baaf209a
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from .evaluator_output_data_type import EvaluatorOutputDataType
+from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition
+
+
+class PublicBooleanEvaluatorOutputDefinition(UniversalBaseModel):
+    data_type: typing_extensions.Annotated[
+        EvaluatorOutputDataType, FieldMetadata(alias="dataType")
+    ] = pydantic.Field()
+    """
+    Always `BOOLEAN`.
+    """
+
+    reasoning: EvaluatorOutputFieldDefinition
+    score: EvaluatorOutputFieldDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py
new file mode 100644
index 000000000..30d4673bb
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py
@@ -0,0 +1,29 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from .evaluator_output_data_type import EvaluatorOutputDataType
+from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition
+from .public_categorical_evaluator_output_score_definition import (
+    PublicCategoricalEvaluatorOutputScoreDefinition,
+)
+
+
+class PublicCategoricalEvaluatorOutputDefinition(UniversalBaseModel):
+    data_type: typing_extensions.Annotated[
+        EvaluatorOutputDataType, FieldMetadata(alias="dataType")
+    ] = pydantic.Field()
+    """
+    Always `CATEGORICAL`.
+    """
+
+    reasoning: EvaluatorOutputFieldDefinition
+    score: PublicCategoricalEvaluatorOutputScoreDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py
new file mode 100644
index 000000000..81deadb93
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py
@@ -0,0 +1,20 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+
+
+class PublicCategoricalEvaluatorOutputScoreDefinition(UniversalBaseModel):
+    description: str
+    categories: typing.List[str]
+    should_allow_multiple_matches: typing_extensions.Annotated[
+        bool, FieldMetadata(alias="shouldAllowMultipleMatches")
+    ]
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/public_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_evaluator_output_definition.py
new file mode 100644
index 000000000..43c7aa9ba
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/public_evaluator_output_definition.py
@@ -0,0 +1,167 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from __future__ import annotations
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition
+from .public_categorical_evaluator_output_score_definition import (
+    PublicCategoricalEvaluatorOutputScoreDefinition,
+)
+
+
+class PublicEvaluatorOutputDefinition_Numeric(UniversalBaseModel):
+    """
+    Evaluator output definition returned by the public API.
+
+    This response always includes `dataType` and never includes an internal output-definition `version`.
+    Legacy stored evaluator definitions are normalized into this shape before they are returned.
+
+    Use this response shape when deciding how to interpret future evaluation scores:
+    - `NUMERIC`: expect numeric score values
+    - `BOOLEAN`: expect `true` / `false`
+    - `CATEGORICAL`: expect one or more values from `score.categories`
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluatorOutputDataType,
+        EvaluatorOutputFieldDefinition,
+        PublicEvaluatorOutputDefinition_Numeric,
+    )
+
+    PublicEvaluatorOutputDefinition_Numeric(
+        data_type=EvaluatorOutputDataType.NUMERIC,
+        reasoning=EvaluatorOutputFieldDefinition(
+            description="Explain why the answer is correct or incorrect.",
+        ),
+        score=EvaluatorOutputFieldDefinition(
+            description="Return a score between 0 and 1.",
+        ),
+    )
+    """
+
+    data_type: typing_extensions.Annotated[
+        typing.Literal["NUMERIC"], FieldMetadata(alias="dataType")
+    ] = "NUMERIC"
+    reasoning: EvaluatorOutputFieldDefinition
+    score: EvaluatorOutputFieldDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class PublicEvaluatorOutputDefinition_Boolean(UniversalBaseModel):
+    """
+    Evaluator output definition returned by the public API.
+
+    This response always includes `dataType` and never includes an internal output-definition `version`.
+    Legacy stored evaluator definitions are normalized into this shape before they are returned.
+
+    Use this response shape when deciding how to interpret future evaluation scores:
+    - `NUMERIC`: expect numeric score values
+    - `BOOLEAN`: expect `true` / `false`
+    - `CATEGORICAL`: expect one or more values from `score.categories`
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluatorOutputDataType,
+        EvaluatorOutputFieldDefinition,
+        PublicEvaluatorOutputDefinition_Numeric,
+    )
+
+    PublicEvaluatorOutputDefinition_Numeric(
+        data_type=EvaluatorOutputDataType.NUMERIC,
+        reasoning=EvaluatorOutputFieldDefinition(
+            description="Explain why the answer is correct or incorrect.",
+        ),
+        score=EvaluatorOutputFieldDefinition(
+            description="Return a score between 0 and 1.",
+        ),
+    )
+    """
+
+    data_type: typing_extensions.Annotated[
+        typing.Literal["BOOLEAN"], FieldMetadata(alias="dataType")
+    ] = "BOOLEAN"
+    reasoning: EvaluatorOutputFieldDefinition
+    score: EvaluatorOutputFieldDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+class PublicEvaluatorOutputDefinition_Categorical(UniversalBaseModel):
+    """
+    Evaluator output definition returned by the public API.
+
+    This response always includes `dataType` and never includes an internal output-definition `version`.
+    Legacy stored evaluator definitions are normalized into this shape before they are returned.
+
+    Use this response shape when deciding how to interpret future evaluation scores:
+    - `NUMERIC`: expect numeric score values
+    - `BOOLEAN`: expect `true` / `false`
+    - `CATEGORICAL`: expect one or more values from `score.categories`
+
+    Examples
+    --------
+    from langfuse.unstable.commons import (
+        EvaluatorOutputDataType,
+        EvaluatorOutputFieldDefinition,
+        PublicEvaluatorOutputDefinition_Numeric,
+    )
+
+    PublicEvaluatorOutputDefinition_Numeric(
+        data_type=EvaluatorOutputDataType.NUMERIC,
+        reasoning=EvaluatorOutputFieldDefinition(
+            description="Explain why the answer is correct or incorrect.",
+        ),
+        score=EvaluatorOutputFieldDefinition(
+            description="Return a score between 0 and 1.",
+        ),
+    )
+    """
+
+    data_type: typing_extensions.Annotated[
+        typing.Literal["CATEGORICAL"], FieldMetadata(alias="dataType")
+    ] = "CATEGORICAL"
+    reasoning: EvaluatorOutputFieldDefinition
+    score: PublicCategoricalEvaluatorOutputScoreDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
+
+
+"""
+from langfuse.unstable.commons import (
+    EvaluatorOutputDataType,
+    EvaluatorOutputFieldDefinition,
+    PublicEvaluatorOutputDefinition_Numeric,
+)
+
+PublicEvaluatorOutputDefinition_Numeric(
+    data_type=EvaluatorOutputDataType.NUMERIC,
+    reasoning=EvaluatorOutputFieldDefinition(
+        description="Explain why the answer is correct or incorrect.",
+    ),
+    score=EvaluatorOutputFieldDefinition(
+        description="Return a score between 0 and 1.",
+    ),
+)
+"""
+PublicEvaluatorOutputDefinition = typing_extensions.Annotated[
+    typing.Union[
+        PublicEvaluatorOutputDefinition_Numeric,
+        PublicEvaluatorOutputDefinition_Boolean,
+        PublicEvaluatorOutputDefinition_Categorical,
+    ],
+    pydantic.Field(discriminator="data_type"),
+]
diff --git a/langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py
new file mode 100644
index 000000000..68987d2ff
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from .evaluator_output_data_type import EvaluatorOutputDataType
+from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition
+
+
+class PublicNumericEvaluatorOutputDefinition(UniversalBaseModel):
+    data_type: typing_extensions.Annotated[
+        EvaluatorOutputDataType, FieldMetadata(alias="dataType")
+    ] = pydantic.Field()
+    """
+    Always `NUMERIC`.
+    """
+
+    reasoning: EvaluatorOutputFieldDefinition
+    score: EvaluatorOutputFieldDefinition
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py
new file mode 100644
index 000000000..bd9332092
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py
@@ -0,0 +1,21 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_string_filter_operator import EvaluationRuleStringFilterOperator
+
+
+class StringEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Column to filter on.
+    """
+
+    operator: EvaluationRuleStringFilterOperator
+    value: str
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py
new file mode 100644
index 000000000..6c287aad6
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py
@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_string_filter_operator import EvaluationRuleStringFilterOperator
+
+
+class StringObjectEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Object-valued column to filter on. In the unstable public API this is currently `metadata`.
+    """
+
+    key: str = pydantic.Field()
+    """
+    Top-level key inside the object-valued column to filter on.
+    """
+
+    operator: EvaluationRuleStringFilterOperator
+    value: str
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py
new file mode 100644
index 000000000..a830e5ad9
--- /dev/null
+++ b/langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py
@@ -0,0 +1,24 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .evaluation_rule_options_filter_operator import EvaluationRuleOptionsFilterOperator
+
+
+class StringOptionsEvaluationRuleFilter(UniversalBaseModel):
+    column: str = pydantic.Field()
+    """
+    Column to filter on.
+    """
+
+    operator: EvaluationRuleOptionsFilterOperator
+    value: typing.List[str] = pydantic.Field()
+    """
+    One or more allowed string values.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/errors/__init__.py b/langfuse/api/unstable/errors/__init__.py
new file mode 100644
index 000000000..42f230c41
--- /dev/null
+++ b/langfuse/api/unstable/errors/__init__.py
@@ -0,0 +1,84 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .types import (
+        PublicApiError,
+        PublicApiErrorCode,
+        PublicApiErrorDetails,
+        PublicApiValidationIssue,
+    )
+    from .errors import (
+        AccessDeniedError,
+        BadRequestError,
+        ConflictError,
+        InternalServerError,
+        MethodNotAllowedError,
+        NotFoundError,
+        TooManyRequestsError,
+        UnauthorizedError,
+        UnprocessableContentError,
+    )
+_dynamic_imports: typing.Dict[str, str] = {
+    "AccessDeniedError": ".errors",
+    "BadRequestError": ".errors",
+    "ConflictError": ".errors",
+    "InternalServerError": ".errors",
+    "MethodNotAllowedError": ".errors",
+    "NotFoundError": ".errors",
+    "PublicApiError": ".types",
+    "PublicApiErrorCode": ".types",
+    "PublicApiErrorDetails": ".types",
+    "PublicApiValidationIssue": ".types",
+    "TooManyRequestsError": ".errors",
+    "UnauthorizedError": ".errors",
+    "UnprocessableContentError": ".errors",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "AccessDeniedError",
+    "BadRequestError",
+    "ConflictError",
+    "InternalServerError",
+    "MethodNotAllowedError",
+    "NotFoundError",
+    "PublicApiError",
+    "PublicApiErrorCode",
+    "PublicApiErrorDetails",
+    "PublicApiValidationIssue",
+    "TooManyRequestsError",
+    "UnauthorizedError",
+    "UnprocessableContentError",
+]
diff --git a/langfuse/api/unstable/errors/errors/__init__.py b/langfuse/api/unstable/errors/errors/__init__.py
new file mode 100644
index 000000000..510e3beb1
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/__init__.py
@@ -0,0 +1,68 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .access_denied_error import AccessDeniedError
+    from .bad_request_error import BadRequestError
+    from .conflict_error import ConflictError
+    from .internal_server_error import InternalServerError
+    from .method_not_allowed_error import MethodNotAllowedError
+    from .not_found_error import NotFoundError
+    from .too_many_requests_error import TooManyRequestsError
+    from .unauthorized_error import UnauthorizedError
+    from .unprocessable_content_error import UnprocessableContentError
+_dynamic_imports: typing.Dict[str, str] = {
+    "AccessDeniedError": ".access_denied_error",
+    "BadRequestError": ".bad_request_error",
+    "ConflictError": ".conflict_error",
+    "InternalServerError": ".internal_server_error",
+    "MethodNotAllowedError": ".method_not_allowed_error",
+    "NotFoundError": ".not_found_error",
+    "TooManyRequestsError": ".too_many_requests_error",
+    "UnauthorizedError": ".unauthorized_error",
+    "UnprocessableContentError": ".unprocessable_content_error",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "AccessDeniedError",
+    "BadRequestError",
+    "ConflictError",
+    "InternalServerError",
+    "MethodNotAllowedError",
+    "NotFoundError",
+    "TooManyRequestsError",
+    "UnauthorizedError",
+    "UnprocessableContentError",
+]
diff --git a/langfuse/api/unstable/errors/errors/access_denied_error.py b/langfuse/api/unstable/errors/errors/access_denied_error.py
new file mode 100644
index 000000000..6e07b4c79
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/access_denied_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class AccessDeniedError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=403, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/bad_request_error.py b/langfuse/api/unstable/errors/errors/bad_request_error.py
new file mode 100644
index 000000000..7ba4c1a00
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/bad_request_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class BadRequestError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=400, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/conflict_error.py b/langfuse/api/unstable/errors/errors/conflict_error.py
new file mode 100644
index 000000000..3630eec67
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/conflict_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class ConflictError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=409, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/internal_server_error.py b/langfuse/api/unstable/errors/errors/internal_server_error.py
new file mode 100644
index 000000000..5921a86ae
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/internal_server_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class InternalServerError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=500, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/method_not_allowed_error.py b/langfuse/api/unstable/errors/errors/method_not_allowed_error.py
new file mode 100644
index 000000000..547598806
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/method_not_allowed_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class MethodNotAllowedError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=405, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/not_found_error.py b/langfuse/api/unstable/errors/errors/not_found_error.py
new file mode 100644
index 000000000..1b65b230e
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/not_found_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class NotFoundError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=404, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/too_many_requests_error.py b/langfuse/api/unstable/errors/errors/too_many_requests_error.py
new file mode 100644
index 000000000..2a8345bc7
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/too_many_requests_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class TooManyRequestsError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=429, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/unauthorized_error.py b/langfuse/api/unstable/errors/errors/unauthorized_error.py
new file mode 100644
index 000000000..84d847643
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/unauthorized_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class UnauthorizedError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=401, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/errors/unprocessable_content_error.py b/langfuse/api/unstable/errors/errors/unprocessable_content_error.py
new file mode 100644
index 000000000..a701ef9c5
--- /dev/null
+++ b/langfuse/api/unstable/errors/errors/unprocessable_content_error.py
@@ -0,0 +1,15 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core.api_error import ApiError
+from ..types.public_api_error import PublicApiError
+
+
+class UnprocessableContentError(ApiError):
+    def __init__(
+        self,
+        body: PublicApiError,
+        headers: typing.Optional[typing.Dict[str, str]] = None,
+    ):
+        super().__init__(status_code=422, headers=headers, body=body)
diff --git a/langfuse/api/unstable/errors/types/__init__.py b/langfuse/api/unstable/errors/types/__init__.py
new file mode 100644
index 000000000..fd016304e
--- /dev/null
+++ b/langfuse/api/unstable/errors/types/__init__.py
@@ -0,0 +1,53 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .public_api_error import PublicApiError
+    from .public_api_error_code import PublicApiErrorCode
+    from .public_api_error_details import PublicApiErrorDetails
+    from .public_api_validation_issue import PublicApiValidationIssue
+_dynamic_imports: typing.Dict[str, str] = {
+    "PublicApiError": ".public_api_error",
+    "PublicApiErrorCode": ".public_api_error_code",
+    "PublicApiErrorDetails": ".public_api_error_details",
+    "PublicApiValidationIssue": ".public_api_validation_issue",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "PublicApiError",
+    "PublicApiErrorCode",
+    "PublicApiErrorDetails",
+    "PublicApiValidationIssue",
+]
diff --git a/langfuse/api/unstable/errors/types/public_api_error.py b/langfuse/api/unstable/errors/types/public_api_error.py
new file mode 100644
index 000000000..5d1384e7c
--- /dev/null
+++ b/langfuse/api/unstable/errors/types/public_api_error.py
@@ -0,0 +1,58 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from .public_api_error_code import PublicApiErrorCode
+from .public_api_error_details import PublicApiErrorDetails
+
+
+class PublicApiError(UniversalBaseModel):
+    """
+    Standard error envelope for the unstable evaluators API.
+
+    Response handling guidance:
+    - Use the HTTP status code for the broad class of failure.
+    - Use `code` for precise branching in SDKs, CLIs, or agents.
+    - Inspect `details` for field-level validation context such as invalid filter values, malformed JSONPath expressions, or missing variable mappings.
+    - Retry only after fixing the specific issue described by `code` and `details`.
+
+    Examples
+    --------
+    from langfuse.unstable.errors import (
+        PublicApiError,
+        PublicApiErrorCode,
+        PublicApiErrorDetails,
+    )
+
+    PublicApiError(
+        message='Filter column "type" contains unsupported value(s): INVALID',
+        code=PublicApiErrorCode.INVALID_FILTER_VALUE,
+        details=PublicApiErrorDetails(
+            field="filter[0].value",
+            column="type",
+            invalid_values=["INVALID"],
+            allowed_values=["GENERATION", "SPAN", "EVENT"],
+        ),
+    )
+    """
+
+    message: str = pydantic.Field()
+    """
+    Human-readable description of the failure.
+    """
+
+    code: PublicApiErrorCode = pydantic.Field()
+    """
+    Stable machine-readable error code.
+    """
+
+    details: typing.Optional[PublicApiErrorDetails] = pydantic.Field(default=None)
+    """
+    Optional structured error context. Inspect the populated fields based on `code`.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/errors/types/public_api_error_code.py b/langfuse/api/unstable/errors/types/public_api_error_code.py
new file mode 100644
index 000000000..fe8f67f83
--- /dev/null
+++ b/langfuse/api/unstable/errors/types/public_api_error_code.py
@@ -0,0 +1,93 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ....core import enum
+
+T_Result = typing.TypeVar("T_Result")
+
+
+class PublicApiErrorCode(enum.StrEnum):
+    """
+    Machine-readable error code returned by the unstable evaluators API.
+
+    SDKs, CLIs, and agents should branch on `code` rather than parsing the human-readable `message`.
+    The HTTP status still indicates the broad error class, while `code` gives the specific failure reason.
+    """
+
+    AUTHENTICATION_FAILED = "authentication_failed"
+    ACCESS_DENIED = "access_denied"
+    INVALID_REQUEST = "invalid_request"
+    INVALID_QUERY = "invalid_query"
+    INVALID_BODY = "invalid_body"
+    INVALID_FILTER_VALUE = "invalid_filter_value"
+    INVALID_JSON_PATH = "invalid_json_path"
+    INVALID_VARIABLE_MAPPING = "invalid_variable_mapping"
+    MISSING_VARIABLE_MAPPING = "missing_variable_mapping"
+    DUPLICATE_VARIABLE_MAPPING = "duplicate_variable_mapping"
+    RESOURCE_NOT_FOUND = "resource_not_found"
+    NAME_CONFLICT = "name_conflict"
+    EVALUATOR_PREFLIGHT_FAILED = "evaluator_preflight_failed"
+    CONFLICT = "conflict"
+    UNPROCESSABLE_CONTENT = "unprocessable_content"
+    RATE_LIMITED = "rate_limited"
+    METHOD_NOT_ALLOWED = "method_not_allowed"
+    INTERNAL_ERROR = "internal_error"
+
+    def visit(
+        self,
+        authentication_failed: typing.Callable[[], T_Result],
+        access_denied: typing.Callable[[], T_Result],
+        invalid_request: typing.Callable[[], T_Result],
+        invalid_query: typing.Callable[[], T_Result],
+        invalid_body: typing.Callable[[], T_Result],
+        invalid_filter_value: typing.Callable[[], T_Result],
+        invalid_json_path: typing.Callable[[], T_Result],
+        invalid_variable_mapping: typing.Callable[[], T_Result],
+        missing_variable_mapping: typing.Callable[[], T_Result],
+        duplicate_variable_mapping: typing.Callable[[], T_Result],
+        resource_not_found: typing.Callable[[], T_Result],
+        name_conflict: typing.Callable[[], T_Result],
+        evaluator_preflight_failed: typing.Callable[[], T_Result],
+        conflict: typing.Callable[[], T_Result],
+        unprocessable_content: typing.Callable[[], T_Result],
+        rate_limited: typing.Callable[[], T_Result],
+        method_not_allowed: typing.Callable[[], T_Result],
+        internal_error: typing.Callable[[], T_Result],
+    ) -> T_Result:
+        if self is PublicApiErrorCode.AUTHENTICATION_FAILED:
+            return authentication_failed()
+        if self is PublicApiErrorCode.ACCESS_DENIED:
+            return access_denied()
+        if self is PublicApiErrorCode.INVALID_REQUEST:
+            return invalid_request()
+        if self is PublicApiErrorCode.INVALID_QUERY:
+            return invalid_query()
+        if self is PublicApiErrorCode.INVALID_BODY:
+            return invalid_body()
+        if self is PublicApiErrorCode.INVALID_FILTER_VALUE:
+            return invalid_filter_value()
+        if self is PublicApiErrorCode.INVALID_JSON_PATH:
+            return invalid_json_path()
+        if self is PublicApiErrorCode.INVALID_VARIABLE_MAPPING:
+            return invalid_variable_mapping()
+        if self is PublicApiErrorCode.MISSING_VARIABLE_MAPPING:
+            return missing_variable_mapping()
+        if self is PublicApiErrorCode.DUPLICATE_VARIABLE_MAPPING:
+            return duplicate_variable_mapping()
+        if self is PublicApiErrorCode.RESOURCE_NOT_FOUND:
+            return resource_not_found()
+        if self is PublicApiErrorCode.NAME_CONFLICT:
+            return name_conflict()
+        if self is PublicApiErrorCode.EVALUATOR_PREFLIGHT_FAILED:
+            return evaluator_preflight_failed()
+        if self is PublicApiErrorCode.CONFLICT:
+            return conflict()
+        if self is PublicApiErrorCode.UNPROCESSABLE_CONTENT:
+            return unprocessable_content()
+        if self is PublicApiErrorCode.RATE_LIMITED:
+            return rate_limited()
+        if self is PublicApiErrorCode.METHOD_NOT_ALLOWED:
+            return method_not_allowed()
+        if self is PublicApiErrorCode.INTERNAL_ERROR:
+            return internal_error()
diff --git a/langfuse/api/unstable/errors/types/public_api_error_details.py b/langfuse/api/unstable/errors/types/public_api_error_details.py
new file mode 100644
index 000000000..803378164
--- /dev/null
+++ b/langfuse/api/unstable/errors/types/public_api_error_details.py
@@ -0,0 +1,114 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from .public_api_validation_issue import PublicApiValidationIssue
+
+
+class PublicApiErrorDetails(UniversalBaseModel):
+    """
+    Optional structured context attached to an unstable-evals error.
+
+    The populated fields depend on the error `code`:
+    - request parsing failures populate `issues`
+    - filter validation failures populate `field`, `column`, `invalidValues`, and `allowedValues`
+    - variable mapping failures populate `field`, `variable`, or `variables`
+    - JSONPath validation failures populate `field`, `variable`, and `value`
+    - evaluator preflight failures populate `evaluatorName`, `provider`, and `model`
+    - rate limiting populates `retryAfterSeconds`, `limit`, `remaining`, and `resetAt`
+    """
+
+    issues: typing.Optional[typing.List[PublicApiValidationIssue]] = pydantic.Field(
+        default=None
+    )
+    """
+    Validation issues for malformed request bodies or query parameters.
+    """
+
+    field: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Path-like reference to the failing field, for example `mapping[1].jsonPath`.
+    """
+
+    column: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Filter column that failed validation.
+    """
+
+    invalid_values: typing_extensions.Annotated[
+        typing.Optional[typing.List[str]], FieldMetadata(alias="invalidValues")
+    ] = pydantic.Field(default=None)
+    """
+    Unsupported values supplied by the caller.
+    """
+
+    allowed_values: typing_extensions.Annotated[
+        typing.Optional[typing.List[str]], FieldMetadata(alias="allowedValues")
+    ] = pydantic.Field(default=None)
+    """
+    Allowed values for the failing filter column.
+    """
+
+    variable: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Evaluator variable involved in the failure.
+    """
+
+    variables: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
+    """
+    Multiple evaluator variables involved in the failure, for example missing mappings.
+    """
+
+    value: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Raw invalid value supplied by the caller.
+    """
+
+    evaluator_name: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="evaluatorName")
+    ] = pydantic.Field(default=None)
+    """
+    Evaluator name used during preflight validation.
+    """
+
+    provider: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Provider resolved during evaluator preflight, if any.
+    """
+
+    model: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Model resolved during evaluator preflight, if any.
+    """
+
+    retry_after_seconds: typing_extensions.Annotated[
+        typing.Optional[int], FieldMetadata(alias="retryAfterSeconds")
+    ] = pydantic.Field(default=None)
+    """
+    Suggested retry delay for rate-limited requests.
+    """
+
+    limit: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Numeric limit associated with the failure, for example the active evaluation-rule cap or the current rate-limit window.
+    """
+
+    remaining: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Remaining requests in the current rate-limit window.
+    """
+
+    reset_at: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="resetAt")
+    ] = pydantic.Field(default=None)
+    """
+    ISO-8601 timestamp when the current rate-limit window resets.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/errors/types/public_api_validation_issue.py b/langfuse/api/unstable/errors/types/public_api_validation_issue.py
new file mode 100644
index 000000000..877d0376a
--- /dev/null
+++ b/langfuse/api/unstable/errors/types/public_api_validation_issue.py
@@ -0,0 +1,34 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+
+
+class PublicApiValidationIssue(UniversalBaseModel):
+    """
+    One validation issue returned for malformed request bodies or query parameters.
+
+    This mirrors the most important parts of a Zod issue: a machine-readable `code`,
+    a human-readable `message`, and a structured `path`.
+    """
+
+    code: str = pydantic.Field()
+    """
+    Machine-readable validation issue code emitted by the server validator.
+    """
+
+    message: str = pydantic.Field()
+    """
+    Human-readable explanation of the validation failure.
+    """
+
+    path: typing.List[typing.Any] = pydantic.Field()
+    """
+    Path to the invalid field, for example `["mapping", 0, "jsonPath"]`.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluation_rules/__init__.py b/langfuse/api/unstable/evaluation_rules/__init__.py
new file mode 100644
index 000000000..f0c007231
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/__init__.py
@@ -0,0 +1,64 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .types import (
+        CreateEvaluationRuleRequest,
+        DeleteEvaluationRuleResponse,
+        EvaluationRule,
+        EvaluationRuleEvaluator,
+        EvaluationRuleEvaluatorReference,
+        EvaluationRules,
+        UpdateEvaluationRuleRequest,
+    )
+_dynamic_imports: typing.Dict[str, str] = {
+    "CreateEvaluationRuleRequest": ".types",
+    "DeleteEvaluationRuleResponse": ".types",
+    "EvaluationRule": ".types",
+    "EvaluationRuleEvaluator": ".types",
+    "EvaluationRuleEvaluatorReference": ".types",
+    "EvaluationRules": ".types",
+    "UpdateEvaluationRuleRequest": ".types",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "CreateEvaluationRuleRequest",
+    "DeleteEvaluationRuleResponse",
+    "EvaluationRule",
+    "EvaluationRuleEvaluator",
+    "EvaluationRuleEvaluatorReference",
+    "EvaluationRules",
+    "UpdateEvaluationRuleRequest",
+]
diff --git a/langfuse/api/unstable/evaluation_rules/client.py b/langfuse/api/unstable/evaluation_rules/client.py
new file mode 100644
index 000000000..20e56e6c3
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/client.py
@@ -0,0 +1,859 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ...core.request_options import RequestOptions
+from ..commons.types.evaluation_rule_filter import EvaluationRuleFilter
+from ..commons.types.evaluation_rule_mapping import EvaluationRuleMapping
+from ..commons.types.evaluation_rule_target import EvaluationRuleTarget
+from .raw_client import AsyncRawEvaluationRulesClient, RawEvaluationRulesClient
+from .types.delete_evaluation_rule_response import DeleteEvaluationRuleResponse
+from .types.evaluation_rule import EvaluationRule
+from .types.evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference
+from .types.evaluation_rules import EvaluationRules
+
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+
+
+class EvaluationRulesClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._raw_client = RawEvaluationRulesClient(client_wrapper=client_wrapper)
+
+    @property
+    def with_raw_response(self) -> RawEvaluationRulesClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        RawEvaluationRulesClient
+        """
+        return self._raw_client
+
+    def create(
+        self,
+        *,
+        name: str,
+        evaluator: EvaluationRuleEvaluatorReference,
+        target: EvaluationRuleTarget,
+        enabled: bool,
+        mapping: typing.Sequence[EvaluationRuleMapping],
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRule:
+        """
+        Create an evaluation rule.
+
+        An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data.
+
+        Use this resource after choosing an evaluator from the evaluator endpoints.
+
+        Key rules:
+        - `name` must be unique within the project for public evaluation rules
+        - `target` must be `observation` or `experiment`
+        - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints
+        - Langfuse resolves that family to its latest version before saving the evaluation rule
+        - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId`
+        - every evaluator prompt variable must be mapped exactly once
+        - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment`
+        - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run
+        - at most 50 evaluation rules can be effectively active in one project at the same time
+
+        If an evaluation rule with the same `name` already exists in the project, the API returns `409`.
+        In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one.
+
+        If enabling this resource would exceed the 50-active limit, the API also returns `409`.
+        In that case, disable or pause another active evaluation rule before enabling a new one.
+
+        Current scope:
+        - evaluation rules are live-ingestion rules only
+        - they do not trigger historical backfills
+
+        Recovery guidance:
+        - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues`
+        - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response
+        - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping`
+        - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable
+        - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name
+        - `400 invalid_json_path`: remove or correct the `jsonPath`
+        - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request.
+
+        Parameters
+        ----------
+        name : str
+            Human-readable deployment name.
+
+        evaluator : EvaluationRuleEvaluatorReference
+            Evaluator family to use.
+
+            Use `name` and `scope` from the evaluator endpoints.
+            Langfuse resolves that family to its latest version before saving the rule.
+
+        target : EvaluationRuleTarget
+            Target object type to evaluate.
+
+        enabled : bool
+            Whether the deployment should be active immediately after creation.
+
+        mapping : typing.Sequence[EvaluationRuleMapping]
+            Required variable mappings.
+
+            Every evaluator variable must appear exactly once.
+            Build this list from the evaluator `variables` array returned by the evaluator endpoints.
+
+        sampling : typing.Optional[float]
+            Optional sampling fraction. Defaults to `1`.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Optional filter list.
+
+            Omit or pass an empty list to evaluate all matching targets for the selected `target`.
+            Each filter object must use a column that is valid for that `target`.
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRule
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+        from langfuse.unstable.commons import (
+            EvaluationRuleFilter_StringOptions,
+            EvaluationRuleMapping,
+            EvaluationRuleMappingSource,
+            EvaluationRuleOptionsFilterOperator,
+            EvaluationRuleTarget,
+            EvaluatorScope,
+        )
+        from langfuse.unstable.evaluation_rules import EvaluationRuleEvaluatorReference
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluation_rules.create(
+            name="answer-correctness-live",
+            evaluator=EvaluationRuleEvaluatorReference(
+                name="answer-correctness",
+                scope=EvaluatorScope.PROJECT,
+            ),
+            target=EvaluationRuleTarget.OBSERVATION,
+            enabled=True,
+            sampling=1.0,
+            filter=[
+                EvaluationRuleFilter_StringOptions(
+                    column="type",
+                    operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+                    value=["GENERATION"],
+                )
+            ],
+            mapping=[
+                EvaluationRuleMapping(
+                    variable="input",
+                    source=EvaluationRuleMappingSource.INPUT,
+                ),
+                EvaluationRuleMapping(
+                    variable="output",
+                    source=EvaluationRuleMappingSource.OUTPUT,
+                ),
+            ],
+        )
+        """
+        _response = self._raw_client.create(
+            name=name,
+            evaluator=evaluator,
+            target=target,
+            enabled=enabled,
+            mapping=mapping,
+            sampling=sampling,
+            filter=filter,
+            request_options=request_options,
+        )
+        return _response.data
+
+    def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRules:
+        """
+        List evaluation rules in the authenticated project.
+
+        Each item describes one live evaluation rule and its effective runtime status.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRules
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluation_rules.list()
+        """
+        _response = self._raw_client.list(
+            page=page, limit=limit, request_options=request_options
+        )
+        return _response.data
+
+    def get(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRule:
+        """
+        Get one evaluation rule by its identifier.
+
+        Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier returned by the evaluation rule endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRule
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluation_rules.get(
+            evaluation_rule_id="evaluationRuleId",
+        )
+        """
+        _response = self._raw_client.get(
+            evaluation_rule_id, request_options=request_options
+        )
+        return _response.data
+
+    def update(
+        self,
+        evaluation_rule_id: str,
+        *,
+        name: typing.Optional[str] = OMIT,
+        evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT,
+        target: typing.Optional[EvaluationRuleTarget] = OMIT,
+        enabled: typing.Optional[bool] = OMIT,
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRule:
+        """
+        Update an evaluation rule.
+
+        Typical uses:
+        - enable or disable live execution
+        - switch to another evaluator
+        - adjust sampling
+        - change filters
+        - update variable mappings
+
+        Important behavior:
+        - provide only the fields you want to change
+        - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving
+        - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration
+        - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target
+        - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run
+        - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409`
+
+        Recovery guidance:
+        - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping`
+        - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter`
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        name : typing.Optional[str]
+            Updated deployment name.
+
+        evaluator : typing.Optional[EvaluationRuleEvaluatorReference]
+            Updated evaluator family.
+
+            Langfuse resolves the provided evaluator family to its latest version before saving the rule.
+
+        target : typing.Optional[EvaluationRuleTarget]
+            Updated target object type.
+
+        enabled : typing.Optional[bool]
+            Updated desired enabled state.
+
+        sampling : typing.Optional[float]
+            Updated sampling fraction.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Updated filter list.
+
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]]
+            Updated variable mappings.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRule
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluation_rules.update(
+            evaluation_rule_id="evaluationRuleId",
+        )
+        """
+        _response = self._raw_client.update(
+            evaluation_rule_id,
+            name=name,
+            evaluator=evaluator,
+            target=target,
+            enabled=enabled,
+            sampling=sampling,
+            filter=filter,
+            mapping=mapping,
+            request_options=request_options,
+        )
+        return _response.data
+
+    def delete(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> DeleteEvaluationRuleResponse:
+        """
+        Delete an evaluation rule.
+
+        This removes the live-ingestion rule only. It does not delete the referenced evaluator.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DeleteEvaluationRuleResponse
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluation_rules.delete(
+            evaluation_rule_id="evaluationRuleId",
+        )
+        """
+        _response = self._raw_client.delete(
+            evaluation_rule_id, request_options=request_options
+        )
+        return _response.data
+
+
+class AsyncEvaluationRulesClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._raw_client = AsyncRawEvaluationRulesClient(client_wrapper=client_wrapper)
+
+    @property
+    def with_raw_response(self) -> AsyncRawEvaluationRulesClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        AsyncRawEvaluationRulesClient
+        """
+        return self._raw_client
+
+    async def create(
+        self,
+        *,
+        name: str,
+        evaluator: EvaluationRuleEvaluatorReference,
+        target: EvaluationRuleTarget,
+        enabled: bool,
+        mapping: typing.Sequence[EvaluationRuleMapping],
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRule:
+        """
+        Create an evaluation rule.
+
+        An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data.
+
+        Use this resource after choosing an evaluator from the evaluator endpoints.
+
+        Key rules:
+        - `name` must be unique within the project for public evaluation rules
+        - `target` must be `observation` or `experiment`
+        - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints
+        - Langfuse resolves that family to its latest version before saving the evaluation rule
+        - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId`
+        - every evaluator prompt variable must be mapped exactly once
+        - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment`
+        - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run
+        - at most 50 evaluation rules can be effectively active in one project at the same time
+
+        If an evaluation rule with the same `name` already exists in the project, the API returns `409`.
+        In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one.
+
+        If enabling this resource would exceed the 50-active limit, the API also returns `409`.
+        In that case, disable or pause another active evaluation rule before enabling a new one.
+
+        Current scope:
+        - evaluation rules are live-ingestion rules only
+        - they do not trigger historical backfills
+
+        Recovery guidance:
+        - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues`
+        - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response
+        - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping`
+        - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable
+        - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name
+        - `400 invalid_json_path`: remove or correct the `jsonPath`
+        - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request.
+
+        Parameters
+        ----------
+        name : str
+            Human-readable deployment name.
+
+        evaluator : EvaluationRuleEvaluatorReference
+            Evaluator family to use.
+
+            Use `name` and `scope` from the evaluator endpoints.
+            Langfuse resolves that family to its latest version before saving the rule.
+
+        target : EvaluationRuleTarget
+            Target object type to evaluate.
+
+        enabled : bool
+            Whether the deployment should be active immediately after creation.
+
+        mapping : typing.Sequence[EvaluationRuleMapping]
+            Required variable mappings.
+
+            Every evaluator variable must appear exactly once.
+            Build this list from the evaluator `variables` array returned by the evaluator endpoints.
+
+        sampling : typing.Optional[float]
+            Optional sampling fraction. Defaults to `1`.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Optional filter list.
+
+            Omit or pass an empty list to evaluate all matching targets for the selected `target`.
+            Each filter object must use a column that is valid for that `target`.
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRule
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+        from langfuse.unstable.commons import (
+            EvaluationRuleFilter_StringOptions,
+            EvaluationRuleMapping,
+            EvaluationRuleMappingSource,
+            EvaluationRuleOptionsFilterOperator,
+            EvaluationRuleTarget,
+            EvaluatorScope,
+        )
+        from langfuse.unstable.evaluation_rules import EvaluationRuleEvaluatorReference
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluation_rules.create(
+                name="answer-correctness-live",
+                evaluator=EvaluationRuleEvaluatorReference(
+                    name="answer-correctness",
+                    scope=EvaluatorScope.PROJECT,
+                ),
+                target=EvaluationRuleTarget.OBSERVATION,
+                enabled=True,
+                sampling=1.0,
+                filter=[
+                    EvaluationRuleFilter_StringOptions(
+                        column="type",
+                        operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+                        value=["GENERATION"],
+                    )
+                ],
+                mapping=[
+                    EvaluationRuleMapping(
+                        variable="input",
+                        source=EvaluationRuleMappingSource.INPUT,
+                    ),
+                    EvaluationRuleMapping(
+                        variable="output",
+                        source=EvaluationRuleMappingSource.OUTPUT,
+                    ),
+                ],
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.create(
+            name=name,
+            evaluator=evaluator,
+            target=target,
+            enabled=enabled,
+            mapping=mapping,
+            sampling=sampling,
+            filter=filter,
+            request_options=request_options,
+        )
+        return _response.data
+
+    async def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRules:
+        """
+        List evaluation rules in the authenticated project.
+
+        Each item describes one live evaluation rule and its effective runtime status.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRules
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluation_rules.list()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.list(
+            page=page, limit=limit, request_options=request_options
+        )
+        return _response.data
+
+    async def get(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRule:
+        """
+        Get one evaluation rule by its identifier.
+
+        Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier returned by the evaluation rule endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRule
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluation_rules.get(
+                evaluation_rule_id="evaluationRuleId",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.get(
+            evaluation_rule_id, request_options=request_options
+        )
+        return _response.data
+
+    async def update(
+        self,
+        evaluation_rule_id: str,
+        *,
+        name: typing.Optional[str] = OMIT,
+        evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT,
+        target: typing.Optional[EvaluationRuleTarget] = OMIT,
+        enabled: typing.Optional[bool] = OMIT,
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> EvaluationRule:
+        """
+        Update an evaluation rule.
+
+        Typical uses:
+        - enable or disable live execution
+        - switch to another evaluator
+        - adjust sampling
+        - change filters
+        - update variable mappings
+
+        Important behavior:
+        - provide only the fields you want to change
+        - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving
+        - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration
+        - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target
+        - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run
+        - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409`
+
+        Recovery guidance:
+        - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping`
+        - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter`
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        name : typing.Optional[str]
+            Updated deployment name.
+
+        evaluator : typing.Optional[EvaluationRuleEvaluatorReference]
+            Updated evaluator family.
+
+            Langfuse resolves the provided evaluator family to its latest version before saving the rule.
+
+        target : typing.Optional[EvaluationRuleTarget]
+            Updated target object type.
+
+        enabled : typing.Optional[bool]
+            Updated desired enabled state.
+
+        sampling : typing.Optional[float]
+            Updated sampling fraction.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Updated filter list.
+
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]]
+            Updated variable mappings.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        EvaluationRule
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluation_rules.update(
+                evaluation_rule_id="evaluationRuleId",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.update(
+            evaluation_rule_id,
+            name=name,
+            evaluator=evaluator,
+            target=target,
+            enabled=enabled,
+            sampling=sampling,
+            filter=filter,
+            mapping=mapping,
+            request_options=request_options,
+        )
+        return _response.data
+
+    async def delete(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> DeleteEvaluationRuleResponse:
+        """
+        Delete an evaluation rule.
+
+        This removes the live-ingestion rule only. It does not delete the referenced evaluator.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        DeleteEvaluationRuleResponse
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluation_rules.delete(
+                evaluation_rule_id="evaluationRuleId",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.delete(
+            evaluation_rule_id, request_options=request_options
+        )
+        return _response.data
diff --git a/langfuse/api/unstable/evaluation_rules/raw_client.py b/langfuse/api/unstable/evaluation_rules/raw_client.py
new file mode 100644
index 000000000..f99aba663
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/raw_client.py
@@ -0,0 +1,2271 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from json.decoder import JSONDecodeError
+
+from ...commons.errors.access_denied_error import (
+    AccessDeniedError as commons_errors_access_denied_error_AccessDeniedError,
+)
+from ...commons.errors.error import Error
+from ...commons.errors.method_not_allowed_error import (
+    MethodNotAllowedError as commons_errors_method_not_allowed_error_MethodNotAllowedError,
+)
+from ...commons.errors.not_found_error import (
+    NotFoundError as commons_errors_not_found_error_NotFoundError,
+)
+from ...commons.errors.unauthorized_error import (
+    UnauthorizedError as commons_errors_unauthorized_error_UnauthorizedError,
+)
+from ...core.api_error import ApiError
+from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ...core.http_response import AsyncHttpResponse, HttpResponse
+from ...core.jsonable_encoder import jsonable_encoder
+from ...core.pydantic_utilities import parse_obj_as
+from ...core.request_options import RequestOptions
+from ...core.serialization import convert_and_respect_annotation_metadata
+from ..commons.types.evaluation_rule_filter import EvaluationRuleFilter
+from ..commons.types.evaluation_rule_mapping import EvaluationRuleMapping
+from ..commons.types.evaluation_rule_target import EvaluationRuleTarget
+from ..errors.errors.access_denied_error import (
+    AccessDeniedError as unstable_errors_errors_access_denied_error_AccessDeniedError,
+)
+from ..errors.errors.bad_request_error import BadRequestError
+from ..errors.errors.conflict_error import ConflictError
+from ..errors.errors.internal_server_error import InternalServerError
+from ..errors.errors.method_not_allowed_error import (
+    MethodNotAllowedError as unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError,
+)
+from ..errors.errors.not_found_error import (
+    NotFoundError as unstable_errors_errors_not_found_error_NotFoundError,
+)
+from ..errors.errors.too_many_requests_error import TooManyRequestsError
+from ..errors.errors.unauthorized_error import (
+    UnauthorizedError as unstable_errors_errors_unauthorized_error_UnauthorizedError,
+)
+from ..errors.errors.unprocessable_content_error import UnprocessableContentError
+from ..errors.types.public_api_error import PublicApiError
+from .types.delete_evaluation_rule_response import DeleteEvaluationRuleResponse
+from .types.evaluation_rule import EvaluationRule
+from .types.evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference
+from .types.evaluation_rules import EvaluationRules
+
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+
+
+class RawEvaluationRulesClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    def create(
+        self,
+        *,
+        name: str,
+        evaluator: EvaluationRuleEvaluatorReference,
+        target: EvaluationRuleTarget,
+        enabled: bool,
+        mapping: typing.Sequence[EvaluationRuleMapping],
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[EvaluationRule]:
+        """
+        Create an evaluation rule.
+
+        An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data.
+
+        Use this resource after choosing an evaluator from the evaluator endpoints.
+
+        Key rules:
+        - `name` must be unique within the project for public evaluation rules
+        - `target` must be `observation` or `experiment`
+        - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints
+        - Langfuse resolves that family to its latest version before saving the evaluation rule
+        - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId`
+        - every evaluator prompt variable must be mapped exactly once
+        - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment`
+        - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run
+        - at most 50 evaluation rules can be effectively active in one project at the same time
+
+        If an evaluation rule with the same `name` already exists in the project, the API returns `409`.
+        In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one.
+
+        If enabling this resource would exceed the 50-active limit, the API also returns `409`.
+        In that case, disable or pause another active evaluation rule before enabling a new one.
+
+        Current scope:
+        - evaluation rules are live-ingestion rules only
+        - they do not trigger historical backfills
+
+        Recovery guidance:
+        - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues`
+        - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response
+        - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping`
+        - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable
+        - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name
+        - `400 invalid_json_path`: remove or correct the `jsonPath`
+        - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request.
+
+        Parameters
+        ----------
+        name : str
+            Human-readable deployment name.
+
+        evaluator : EvaluationRuleEvaluatorReference
+            Evaluator family to use.
+
+            Use `name` and `scope` from the evaluator endpoints.
+            Langfuse resolves that family to its latest version before saving the rule.
+
+        target : EvaluationRuleTarget
+            Target object type to evaluate.
+
+        enabled : bool
+            Whether the deployment should be active immediately after creation.
+
+        mapping : typing.Sequence[EvaluationRuleMapping]
+            Required variable mappings.
+
+            Every evaluator variable must appear exactly once.
+            Build this list from the evaluator `variables` array returned by the evaluator endpoints.
+
+        sampling : typing.Optional[float]
+            Optional sampling fraction. Defaults to `1`.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Optional filter list.
+
+            Omit or pass an empty list to evaluate all matching targets for the selected `target`.
+            Each filter object must use a column that is valid for that `target`.
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[EvaluationRule]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluation-rules",
+            method="POST",
+            json={
+                "name": name,
+                "evaluator": convert_and_respect_annotation_metadata(
+                    object_=evaluator,
+                    annotation=EvaluationRuleEvaluatorReference,
+                    direction="write",
+                ),
+                "target": target,
+                "enabled": enabled,
+                "sampling": sampling,
+                "filter": convert_and_respect_annotation_metadata(
+                    object_=filter,
+                    annotation=typing.Sequence[EvaluationRuleFilter],
+                    direction="write",
+                ),
+                "mapping": convert_and_respect_annotation_metadata(
+                    object_=mapping,
+                    annotation=typing.Sequence[EvaluationRuleMapping],
+                    direction="write",
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRule,
+                    parse_obj_as(
+                        type_=EvaluationRule,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 409:
+                raise ConflictError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableContentError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[EvaluationRules]:
+        """
+        List evaluation rules in the authenticated project.
+
+        Each item describes one live evaluation rule and its effective runtime status.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[EvaluationRules]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluation-rules",
+            method="GET",
+            params={
+                "page": page,
+                "limit": limit,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRules,
+                    parse_obj_as(
+                        type_=EvaluationRules,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    def get(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[EvaluationRule]:
+        """
+        Get one evaluation rule by its identifier.
+
+        Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier returned by the evaluation rule endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[EvaluationRule]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRule,
+                    parse_obj_as(
+                        type_=EvaluationRule,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    def update(
+        self,
+        evaluation_rule_id: str,
+        *,
+        name: typing.Optional[str] = OMIT,
+        evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT,
+        target: typing.Optional[EvaluationRuleTarget] = OMIT,
+        enabled: typing.Optional[bool] = OMIT,
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[EvaluationRule]:
+        """
+        Update an evaluation rule.
+
+        Typical uses:
+        - enable or disable live execution
+        - switch to another evaluator
+        - adjust sampling
+        - change filters
+        - update variable mappings
+
+        Important behavior:
+        - provide only the fields you want to change
+        - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving
+        - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration
+        - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target
+        - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run
+        - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409`
+
+        Recovery guidance:
+        - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping`
+        - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter`
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        name : typing.Optional[str]
+            Updated deployment name.
+
+        evaluator : typing.Optional[EvaluationRuleEvaluatorReference]
+            Updated evaluator family.
+
+            Langfuse resolves the provided evaluator family to its latest version before saving the rule.
+
+        target : typing.Optional[EvaluationRuleTarget]
+            Updated target object type.
+
+        enabled : typing.Optional[bool]
+            Updated desired enabled state.
+
+        sampling : typing.Optional[float]
+            Updated sampling fraction.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Updated filter list.
+
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]]
+            Updated variable mappings.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[EvaluationRule]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}",
+            method="PATCH",
+            json={
+                "name": name,
+                "evaluator": convert_and_respect_annotation_metadata(
+                    object_=evaluator,
+                    annotation=EvaluationRuleEvaluatorReference,
+                    direction="write",
+                ),
+                "target": target,
+                "enabled": enabled,
+                "sampling": sampling,
+                "filter": convert_and_respect_annotation_metadata(
+                    object_=filter,
+                    annotation=typing.Sequence[EvaluationRuleFilter],
+                    direction="write",
+                ),
+                "mapping": convert_and_respect_annotation_metadata(
+                    object_=mapping,
+                    annotation=typing.Sequence[EvaluationRuleMapping],
+                    direction="write",
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRule,
+                    parse_obj_as(
+                        type_=EvaluationRule,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableContentError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    def delete(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[DeleteEvaluationRuleResponse]:
+        """
+        Delete an evaluation rule.
+
+        This removes the live-ingestion rule only. It does not delete the referenced evaluator.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[DeleteEvaluationRuleResponse]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    DeleteEvaluationRuleResponse,
+                    parse_obj_as(
+                        type_=DeleteEvaluationRuleResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+
+class AsyncRawEvaluationRulesClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    async def create(
+        self,
+        *,
+        name: str,
+        evaluator: EvaluationRuleEvaluatorReference,
+        target: EvaluationRuleTarget,
+        enabled: bool,
+        mapping: typing.Sequence[EvaluationRuleMapping],
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[EvaluationRule]:
+        """
+        Create an evaluation rule.
+
+        An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data.
+
+        Use this resource after choosing an evaluator from the evaluator endpoints.
+
+        Key rules:
+        - `name` must be unique within the project for public evaluation rules
+        - `target` must be `observation` or `experiment`
+        - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints
+        - Langfuse resolves that family to its latest version before saving the evaluation rule
+        - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId`
+        - every evaluator prompt variable must be mapped exactly once
+        - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment`
+        - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run
+        - at most 50 evaluation rules can be effectively active in one project at the same time
+
+        If an evaluation rule with the same `name` already exists in the project, the API returns `409`.
+        In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one.
+
+        If enabling this resource would exceed the 50-active limit, the API also returns `409`.
+        In that case, disable or pause another active evaluation rule before enabling a new one.
+
+        Current scope:
+        - evaluation rules are live-ingestion rules only
+        - they do not trigger historical backfills
+
+        Recovery guidance:
+        - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues`
+        - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response
+        - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping`
+        - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable
+        - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name
+        - `400 invalid_json_path`: remove or correct the `jsonPath`
+        - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request.
+
+        Parameters
+        ----------
+        name : str
+            Human-readable deployment name.
+
+        evaluator : EvaluationRuleEvaluatorReference
+            Evaluator family to use.
+
+            Use `name` and `scope` from the evaluator endpoints.
+            Langfuse resolves that family to its latest version before saving the rule.
+
+        target : EvaluationRuleTarget
+            Target object type to evaluate.
+
+        enabled : bool
+            Whether the deployment should be active immediately after creation.
+
+        mapping : typing.Sequence[EvaluationRuleMapping]
+            Required variable mappings.
+
+            Every evaluator variable must appear exactly once.
+            Build this list from the evaluator `variables` array returned by the evaluator endpoints.
+
+        sampling : typing.Optional[float]
+            Optional sampling fraction. Defaults to `1`.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Optional filter list.
+
+            Omit or pass an empty list to evaluate all matching targets for the selected `target`.
+            Each filter object must use a column that is valid for that `target`.
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[EvaluationRule]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluation-rules",
+            method="POST",
+            json={
+                "name": name,
+                "evaluator": convert_and_respect_annotation_metadata(
+                    object_=evaluator,
+                    annotation=EvaluationRuleEvaluatorReference,
+                    direction="write",
+                ),
+                "target": target,
+                "enabled": enabled,
+                "sampling": sampling,
+                "filter": convert_and_respect_annotation_metadata(
+                    object_=filter,
+                    annotation=typing.Sequence[EvaluationRuleFilter],
+                    direction="write",
+                ),
+                "mapping": convert_and_respect_annotation_metadata(
+                    object_=mapping,
+                    annotation=typing.Sequence[EvaluationRuleMapping],
+                    direction="write",
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRule,
+                    parse_obj_as(
+                        type_=EvaluationRule,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 409:
+                raise ConflictError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableContentError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    async def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[EvaluationRules]:
+        """
+        List evaluation rules in the authenticated project.
+
+        Each item describes one live evaluation rule and its effective runtime status.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[EvaluationRules]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluation-rules",
+            method="GET",
+            params={
+                "page": page,
+                "limit": limit,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRules,
+                    parse_obj_as(
+                        type_=EvaluationRules,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    async def get(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[EvaluationRule]:
+        """
+        Get one evaluation rule by its identifier.
+
+        Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier returned by the evaluation rule endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[EvaluationRule]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRule,
+                    parse_obj_as(
+                        type_=EvaluationRule,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    async def update(
+        self,
+        evaluation_rule_id: str,
+        *,
+        name: typing.Optional[str] = OMIT,
+        evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT,
+        target: typing.Optional[EvaluationRuleTarget] = OMIT,
+        enabled: typing.Optional[bool] = OMIT,
+        sampling: typing.Optional[float] = OMIT,
+        filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT,
+        mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[EvaluationRule]:
+        """
+        Update an evaluation rule.
+
+        Typical uses:
+        - enable or disable live execution
+        - switch to another evaluator
+        - adjust sampling
+        - change filters
+        - update variable mappings
+
+        Important behavior:
+        - provide only the fields you want to change
+        - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving
+        - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration
+        - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target
+        - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run
+        - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409`
+
+        Recovery guidance:
+        - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping`
+        - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter`
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        name : typing.Optional[str]
+            Updated deployment name.
+
+        evaluator : typing.Optional[EvaluationRuleEvaluatorReference]
+            Updated evaluator family.
+
+            Langfuse resolves the provided evaluator family to its latest version before saving the rule.
+
+        target : typing.Optional[EvaluationRuleTarget]
+            Updated target object type.
+
+        enabled : typing.Optional[bool]
+            Updated desired enabled state.
+
+        sampling : typing.Optional[float]
+            Updated sampling fraction.
+
+        filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]]
+            Updated filter list.
+
+            For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+
+        mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]]
+            Updated variable mappings.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[EvaluationRule]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}",
+            method="PATCH",
+            json={
+                "name": name,
+                "evaluator": convert_and_respect_annotation_metadata(
+                    object_=evaluator,
+                    annotation=EvaluationRuleEvaluatorReference,
+                    direction="write",
+                ),
+                "target": target,
+                "enabled": enabled,
+                "sampling": sampling,
+                "filter": convert_and_respect_annotation_metadata(
+                    object_=filter,
+                    annotation=typing.Sequence[EvaluationRuleFilter],
+                    direction="write",
+                ),
+                "mapping": convert_and_respect_annotation_metadata(
+                    object_=mapping,
+                    annotation=typing.Sequence[EvaluationRuleMapping],
+                    direction="write",
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    EvaluationRule,
+                    parse_obj_as(
+                        type_=EvaluationRule,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableContentError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    async def delete(
+        self,
+        evaluation_rule_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[DeleteEvaluationRuleResponse]:
+        """
+        Delete an evaluation rule.
+
+        This removes the live-ingestion rule only. It does not delete the referenced evaluator.
+
+        Parameters
+        ----------
+        evaluation_rule_id : str
+            Evaluation rule identifier.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[DeleteEvaluationRuleResponse]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}",
+            method="DELETE",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    DeleteEvaluationRuleResponse,
+                    parse_obj_as(
+                        type_=DeleteEvaluationRuleResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
diff --git a/langfuse/api/unstable/evaluation_rules/types/__init__.py b/langfuse/api/unstable/evaluation_rules/types/__init__.py
new file mode 100644
index 000000000..2854b1237
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/__init__.py
@@ -0,0 +1,62 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .create_evaluation_rule_request import CreateEvaluationRuleRequest
+    from .delete_evaluation_rule_response import DeleteEvaluationRuleResponse
+    from .evaluation_rule import EvaluationRule
+    from .evaluation_rule_evaluator import EvaluationRuleEvaluator
+    from .evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference
+    from .evaluation_rules import EvaluationRules
+    from .update_evaluation_rule_request import UpdateEvaluationRuleRequest
+_dynamic_imports: typing.Dict[str, str] = {
+    "CreateEvaluationRuleRequest": ".create_evaluation_rule_request",
+    "DeleteEvaluationRuleResponse": ".delete_evaluation_rule_response",
+    "EvaluationRule": ".evaluation_rule",
+    "EvaluationRuleEvaluator": ".evaluation_rule_evaluator",
+    "EvaluationRuleEvaluatorReference": ".evaluation_rule_evaluator_reference",
+    "EvaluationRules": ".evaluation_rules",
+    "UpdateEvaluationRuleRequest": ".update_evaluation_rule_request",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = [
+    "CreateEvaluationRuleRequest",
+    "DeleteEvaluationRuleResponse",
+    "EvaluationRule",
+    "EvaluationRuleEvaluator",
+    "EvaluationRuleEvaluatorReference",
+    "EvaluationRules",
+    "UpdateEvaluationRuleRequest",
+]
diff --git a/langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py b/langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py
new file mode 100644
index 000000000..9a90b227a
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py
@@ -0,0 +1,75 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from ...commons.types.evaluation_rule_filter import EvaluationRuleFilter
+from ...commons.types.evaluation_rule_mapping import EvaluationRuleMapping
+from ...commons.types.evaluation_rule_target import EvaluationRuleTarget
+from .evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference
+
+
+class CreateEvaluationRuleRequest(UniversalBaseModel):
+    """
+    Request body for creating an evaluation rule.
+
+    Checklist for agents and SDK clients:
+    - reference an existing evaluator family by `evaluator.name` and `evaluator.scope`
+    - choose `target=observation` or `target=experiment`
+    - if `target=experiment` and you want a dataset filter, call `GET /api/public/v2/datasets` first and use dataset `id` values in `filter[].value`
+    - fetch or inspect the evaluator first, then provide a complete variable mapping for every evaluator variable listed in `variables`
+    - optionally narrow execution with `filter`
+    - set `enabled=true` only when you want live execution immediately
+    """
+
+    name: str = pydantic.Field()
+    """
+    Human-readable deployment name.
+    """
+
+    evaluator: EvaluationRuleEvaluatorReference = pydantic.Field()
+    """
+    Evaluator family to use.
+    
+    Use `name` and `scope` from the evaluator endpoints.
+    Langfuse resolves that family to its latest version before saving the rule.
+    """
+
+    target: EvaluationRuleTarget = pydantic.Field()
+    """
+    Target object type to evaluate.
+    """
+
+    enabled: bool = pydantic.Field()
+    """
+    Whether the deployment should be active immediately after creation.
+    """
+
+    sampling: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    Optional sampling fraction. Defaults to `1`.
+    """
+
+    filter: typing.Optional[typing.List[EvaluationRuleFilter]] = pydantic.Field(
+        default=None
+    )
+    """
+    Optional filter list.
+    
+    Omit or pass an empty list to evaluate all matching targets for the selected `target`.
+    Each filter object must use a column that is valid for that `target`.
+    For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+    """
+
+    mapping: typing.List[EvaluationRuleMapping] = pydantic.Field()
+    """
+    Required variable mappings.
+    
+    Every evaluator variable must appear exactly once.
+    Build this list from the evaluator `variables` array returned by the evaluator endpoints.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py b/langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py
new file mode 100644
index 000000000..42423c3dc
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py
@@ -0,0 +1,21 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+
+
+class DeleteEvaluationRuleResponse(UniversalBaseModel):
+    """
+    Confirmation response returned after successful deletion.
+    """
+
+    message: str = pydantic.Field()
+    """
+    Always `Evaluation rule successfully deleted`.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py
new file mode 100644
index 000000000..d8baee407
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py
@@ -0,0 +1,172 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from ...commons.types.evaluation_rule_filter import EvaluationRuleFilter
+from ...commons.types.evaluation_rule_mapping import EvaluationRuleMapping
+from ...commons.types.evaluation_rule_status import EvaluationRuleStatus
+from ...commons.types.evaluation_rule_target import EvaluationRuleTarget
+from .evaluation_rule_evaluator import EvaluationRuleEvaluator
+
+
+class EvaluationRule(UniversalBaseModel):
+    """
+    Live evaluation rule for incoming data.
+
+    An evaluation rule answers:
+    - which evaluator should be used
+    - which target objects should trigger scoring
+    - how often scoring should run
+    - which target fields should populate each evaluator variable
+    - whether the deployment is active, inactive, or paused
+
+    Important status semantics:
+    - `enabled` is the desired on/off setting from the client
+    - `status` is the effective runtime state after Langfuse applies validation and blocking rules
+    - `enabled=true` with `status=paused` means the rule should run, but Langfuse has paused it until the underlying problem is fixed
+
+    Examples
+    --------
+    import datetime
+
+    from langfuse.unstable.commons import (
+        EvaluationRuleFilter_StringOptions,
+        EvaluationRuleMapping,
+        EvaluationRuleMappingSource,
+        EvaluationRuleOptionsFilterOperator,
+        EvaluationRuleStatus,
+        EvaluationRuleTarget,
+        EvaluatorScope,
+    )
+    from langfuse.unstable.evaluation_rules import (
+        EvaluationRule,
+        EvaluationRuleEvaluator,
+    )
+
+    EvaluationRule(
+        id="erule_123",
+        name="answer-correctness-live",
+        evaluator=EvaluationRuleEvaluator(
+            id="evaltmpl_123",
+            name="answer-correctness",
+            scope=EvaluatorScope.PROJECT,
+        ),
+        target=EvaluationRuleTarget.OBSERVATION,
+        enabled=True,
+        status=EvaluationRuleStatus.ACTIVE,
+        sampling=1.0,
+        filter=[
+            EvaluationRuleFilter_StringOptions(
+                column="type",
+                operator=EvaluationRuleOptionsFilterOperator.ANY_OF,
+                value=["GENERATION"],
+            )
+        ],
+        mapping=[
+            EvaluationRuleMapping(
+                variable="input",
+                source=EvaluationRuleMappingSource.INPUT,
+            ),
+            EvaluationRuleMapping(
+                variable="output",
+                source=EvaluationRuleMappingSource.OUTPUT,
+            ),
+        ],
+        created_at=datetime.datetime.fromisoformat(
+            "2026-03-30 09:20:00+00:00",
+        ),
+        updated_at=datetime.datetime.fromisoformat(
+            "2026-03-30 09:20:00+00:00",
+        ),
+    )
+    """
+
+    id: str = pydantic.Field()
+    """
+    Stable evaluation rule identifier.
+    """
+
+    name: str = pydantic.Field()
+    """
+    Human-readable deployment name. This is independent from the evaluator name.
+    """
+
+    evaluator: EvaluationRuleEvaluator = pydantic.Field()
+    """
+    Evaluator currently used by this rule.
+    
+    `name` and `scope` identify the evaluator family conceptually.
+    `id` is the currently active evaluator version in that family.
+    If you create a newer project version with the same evaluator name later, existing evaluation rules are moved to it automatically.
+    """
+
+    target: EvaluationRuleTarget = pydantic.Field()
+    """
+    Target object type that should trigger scoring.
+    """
+
+    enabled: bool = pydantic.Field()
+    """
+    Desired enabled state configured by the client.
+    """
+
+    status: EvaluationRuleStatus = pydantic.Field()
+    """
+    Effective runtime status after Langfuse applies validation and blocking rules.
+    """
+
+    paused_reason: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="pausedReason")
+    ] = pydantic.Field(default=None)
+    """
+    Machine-readable reason when `status=paused`, otherwise `null`.
+    """
+
+    paused_message: typing_extensions.Annotated[
+        typing.Optional[str], FieldMetadata(alias="pausedMessage")
+    ] = pydantic.Field(default=None)
+    """
+    Human-readable explanation when `status=paused`, otherwise `null`.
+    """
+
+    sampling: float = pydantic.Field()
+    """
+    Fraction of matching target objects that should be evaluated.
+    
+    Must be greater than `0` and less than or equal to `1`.
+    - `1` means evaluate every matching target.
+    - `0.25` means evaluate approximately 25% of matching targets.
+    """
+
+    filter: typing.List[EvaluationRuleFilter] = pydantic.Field()
+    """
+    List of filter conditions used to decide whether a target should be evaluated.
+    """
+
+    mapping: typing.List[EvaluationRuleMapping] = pydantic.Field()
+    """
+    Variable mappings used to populate the evaluator prompt from the live target object.
+    """
+
+    created_at: typing_extensions.Annotated[
+        dt.datetime, FieldMetadata(alias="createdAt")
+    ] = pydantic.Field()
+    """
+    Timestamp when the evaluation rule was created.
+    """
+
+    updated_at: typing_extensions.Annotated[
+        dt.datetime, FieldMetadata(alias="updatedAt")
+    ] = pydantic.Field()
+    """
+    Timestamp when the evaluation rule was last updated.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py
new file mode 100644
index 000000000..9d1be79de
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py
@@ -0,0 +1,35 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from ...commons.types.evaluator_scope import EvaluatorScope
+
+
+class EvaluationRuleEvaluator(UniversalBaseModel):
+    """
+    Resolved evaluator currently used by the evaluation rule.
+
+    `id` is the exact active evaluator version.
+    `name` and `scope` identify the evaluator family conceptually.
+    """
+
+    id: str = pydantic.Field()
+    """
+    Identifier of the exact evaluator version currently used by the rule.
+    """
+
+    name: str = pydantic.Field()
+    """
+    Evaluator family name.
+    """
+
+    scope: EvaluatorScope = pydantic.Field()
+    """
+    Whether the evaluator family is project-owned or Langfuse-managed.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py
new file mode 100644
index 000000000..25253182f
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py
@@ -0,0 +1,29 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from ...commons.types.evaluator_scope import EvaluatorScope
+
+
+class EvaluationRuleEvaluatorReference(UniversalBaseModel):
+    """
+    Evaluator family reference used when creating or updating an evaluation rule.
+
+    `name` and `scope` are enough to identify the evaluator family in the authenticated project context.
+    """
+
+    name: str = pydantic.Field()
+    """
+    Evaluator family name.
+    """
+
+    scope: EvaluatorScope = pydantic.Field()
+    """
+    Whether the evaluator family is project-owned or Langfuse-managed.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py
new file mode 100644
index 000000000..cd1f74c6d
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py
@@ -0,0 +1,28 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....utils.pagination.types.meta_response import MetaResponse
+from .evaluation_rule import EvaluationRule
+
+
+class EvaluationRules(UniversalBaseModel):
+    """
+    Paginated list of evaluation rules.
+    """
+
+    data: typing.List[EvaluationRule] = pydantic.Field()
+    """
+    Evaluation rules in the current page.
+    """
+
+    meta: MetaResponse = pydantic.Field()
+    """
+    Standard pagination metadata.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py b/langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py
new file mode 100644
index 000000000..51e2d9288
--- /dev/null
+++ b/langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py
@@ -0,0 +1,74 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from ...commons.types.evaluation_rule_filter import EvaluationRuleFilter
+from ...commons.types.evaluation_rule_mapping import EvaluationRuleMapping
+from ...commons.types.evaluation_rule_target import EvaluationRuleTarget
+from .evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference
+
+
+class UpdateEvaluationRuleRequest(UniversalBaseModel):
+    """
+    Partial update body for an evaluation rule.
+
+    Provide only the fields you want to change.
+    An empty body is rejected.
+
+    Practical guidance:
+    - If you only want to rename the rule or change sampling, send just those fields.
+    - If you change `evaluator`, send a fresh `mapping` unless you are certain the existing mapping still matches the evaluator variables.
+    - If you change `target`, usually send both `filter` and `mapping` in the same request.
+    - If you change an experiment `datasetId` filter, call `GET /api/public/v2/datasets` and use dataset `id` values from that response.
+    """
+
+    name: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Updated deployment name.
+    """
+
+    evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = pydantic.Field(
+        default=None
+    )
+    """
+    Updated evaluator family.
+    
+    Langfuse resolves the provided evaluator family to its latest version before saving the rule.
+    """
+
+    target: typing.Optional[EvaluationRuleTarget] = pydantic.Field(default=None)
+    """
+    Updated target object type.
+    """
+
+    enabled: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    Updated desired enabled state.
+    """
+
+    sampling: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    Updated sampling fraction.
+    """
+
+    filter: typing.Optional[typing.List[EvaluationRuleFilter]] = pydantic.Field(
+        default=None
+    )
+    """
+    Updated filter list.
+    
+    For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names.
+    """
+
+    mapping: typing.Optional[typing.List[EvaluationRuleMapping]] = pydantic.Field(
+        default=None
+    )
+    """
+    Updated variable mappings.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluators/__init__.py b/langfuse/api/unstable/evaluators/__init__.py
new file mode 100644
index 000000000..942109740
--- /dev/null
+++ b/langfuse/api/unstable/evaluators/__init__.py
@@ -0,0 +1,44 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .types import CreateEvaluatorRequest, Evaluator, Evaluators
+_dynamic_imports: typing.Dict[str, str] = {
+    "CreateEvaluatorRequest": ".types",
+    "Evaluator": ".types",
+    "Evaluators": ".types",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = ["CreateEvaluatorRequest", "Evaluator", "Evaluators"]
diff --git a/langfuse/api/unstable/evaluators/client.py b/langfuse/api/unstable/evaluators/client.py
new file mode 100644
index 000000000..b7f25532a
--- /dev/null
+++ b/langfuse/api/unstable/evaluators/client.py
@@ -0,0 +1,458 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ...core.request_options import RequestOptions
+from ..commons.types.evaluator_model_config import EvaluatorModelConfig
+from ..commons.types.evaluator_output_definition import EvaluatorOutputDefinition
+from .raw_client import AsyncRawEvaluatorsClient, RawEvaluatorsClient
+from .types.evaluator import Evaluator
+from .types.evaluators import Evaluators
+
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+
+
+class EvaluatorsClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._raw_client = RawEvaluatorsClient(client_wrapper=client_wrapper)
+
+    @property
+    def with_raw_response(self) -> RawEvaluatorsClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        RawEvaluatorsClient
+        """
+        return self._raw_client
+
+    def create(
+        self,
+        *,
+        name: str,
+        prompt: str,
+        output_definition: EvaluatorOutputDefinition,
+        model_config: typing.Optional[EvaluatorModelConfig] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Evaluator:
+        """
+        Create an evaluator in the authenticated project.
+
+        Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration.
+
+        Naming behavior:
+        - If this is a new evaluator name in your project, Langfuse creates version `1`.
+        - If the name already exists in your project, Langfuse creates the next version and returns it.
+        - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name.
+
+        Recommended workflow:
+        1. Create the evaluator.
+        2. Read the returned `variables` array.
+        3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical.
+        4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`.
+
+        Recovery guidance:
+        - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request.
+        - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry.
+        - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape.
+
+        Unstable API note:
+        - This surface may evolve while the underlying evaluation data model is being redesigned.
+
+        Parameters
+        ----------
+        name : str
+            Evaluator name within the authenticated project.
+
+        prompt : str
+            Prompt template used by the evaluator.
+
+        output_definition : EvaluatorOutputDefinition
+            Structured output schema the evaluator must return.
+
+            Always send `dataType`.
+            Do not send `version`; it is an internal storage detail and not part of the public request contract.
+
+        model_config : typing.Optional[EvaluatorModelConfig]
+            Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Evaluator
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+        from langfuse.unstable.commons import (
+            EvaluatorModelConfig,
+            EvaluatorOutputDataType,
+            EvaluatorOutputDefinition_Numeric,
+            EvaluatorOutputFieldDefinition,
+        )
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluators.create(
+            name="answer-correctness",
+            prompt="You are grading an answer.\n\nInput:\n{{input}}\n\nOutput:\n{{output}}\n\nReturn a score between 0 and 1.\n",
+            output_definition=EvaluatorOutputDefinition_Numeric(
+                data_type=EvaluatorOutputDataType.NUMERIC,
+                reasoning=EvaluatorOutputFieldDefinition(
+                    description="Explain why the score was assigned.",
+                ),
+                score=EvaluatorOutputFieldDefinition(
+                    description="Correctness score between 0 and 1.",
+                ),
+            ),
+            model_config=EvaluatorModelConfig(
+                provider="openai",
+                model="gpt-4.1-mini",
+            ),
+        )
+        """
+        _response = self._raw_client.create(
+            name=name,
+            prompt=prompt,
+            output_definition=output_definition,
+            model_config=model_config,
+            request_options=request_options,
+        )
+        return _response.data
+
+    def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Evaluators:
+        """
+        List the evaluators available to the authenticated project.
+
+        Important behavior:
+        - This endpoint returns the latest version of each available evaluator.
+        - Results can include evaluators from your project and Langfuse-managed evaluators.
+        - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Evaluators
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluators.list()
+        """
+        _response = self._raw_client.list(
+            page=page, limit=limit, request_options=request_options
+        )
+        return _response.data
+
+    def get(
+        self,
+        evaluator_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Evaluator:
+        """
+        Get one evaluator by `id`.
+
+        Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule.
+
+        Parameters
+        ----------
+        evaluator_id : str
+            Evaluator identifier returned by the evaluator endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Evaluator
+
+        Examples
+        --------
+        from langfuse import LangfuseAPI
+
+        client = LangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+        client.unstable.evaluators.get(
+            evaluator_id="evaluatorId",
+        )
+        """
+        _response = self._raw_client.get(evaluator_id, request_options=request_options)
+        return _response.data
+
+
+class AsyncEvaluatorsClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._raw_client = AsyncRawEvaluatorsClient(client_wrapper=client_wrapper)
+
+    @property
+    def with_raw_response(self) -> AsyncRawEvaluatorsClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        AsyncRawEvaluatorsClient
+        """
+        return self._raw_client
+
+    async def create(
+        self,
+        *,
+        name: str,
+        prompt: str,
+        output_definition: EvaluatorOutputDefinition,
+        model_config: typing.Optional[EvaluatorModelConfig] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Evaluator:
+        """
+        Create an evaluator in the authenticated project.
+
+        Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration.
+
+        Naming behavior:
+        - If this is a new evaluator name in your project, Langfuse creates version `1`.
+        - If the name already exists in your project, Langfuse creates the next version and returns it.
+        - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name.
+
+        Recommended workflow:
+        1. Create the evaluator.
+        2. Read the returned `variables` array.
+        3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical.
+        4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`.
+
+        Recovery guidance:
+        - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request.
+        - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry.
+        - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape.
+
+        Unstable API note:
+        - This surface may evolve while the underlying evaluation data model is being redesigned.
+
+        Parameters
+        ----------
+        name : str
+            Evaluator name within the authenticated project.
+
+        prompt : str
+            Prompt template used by the evaluator.
+
+        output_definition : EvaluatorOutputDefinition
+            Structured output schema the evaluator must return.
+
+            Always send `dataType`.
+            Do not send `version`; it is an internal storage detail and not part of the public request contract.
+
+        model_config : typing.Optional[EvaluatorModelConfig]
+            Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Evaluator
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+        from langfuse.unstable.commons import (
+            EvaluatorModelConfig,
+            EvaluatorOutputDataType,
+            EvaluatorOutputDefinition_Numeric,
+            EvaluatorOutputFieldDefinition,
+        )
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluators.create(
+                name="answer-correctness",
+                prompt="You are grading an answer.\n\nInput:\n{{input}}\n\nOutput:\n{{output}}\n\nReturn a score between 0 and 1.\n",
+                output_definition=EvaluatorOutputDefinition_Numeric(
+                    data_type=EvaluatorOutputDataType.NUMERIC,
+                    reasoning=EvaluatorOutputFieldDefinition(
+                        description="Explain why the score was assigned.",
+                    ),
+                    score=EvaluatorOutputFieldDefinition(
+                        description="Correctness score between 0 and 1.",
+                    ),
+                ),
+                model_config=EvaluatorModelConfig(
+                    provider="openai",
+                    model="gpt-4.1-mini",
+                ),
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.create(
+            name=name,
+            prompt=prompt,
+            output_definition=output_definition,
+            model_config=model_config,
+            request_options=request_options,
+        )
+        return _response.data
+
+    async def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Evaluators:
+        """
+        List the evaluators available to the authenticated project.
+
+        Important behavior:
+        - This endpoint returns the latest version of each available evaluator.
+        - Results can include evaluators from your project and Langfuse-managed evaluators.
+        - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Evaluators
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluators.list()
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.list(
+            page=page, limit=limit, request_options=request_options
+        )
+        return _response.data
+
+    async def get(
+        self,
+        evaluator_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> Evaluator:
+        """
+        Get one evaluator by `id`.
+
+        Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule.
+
+        Parameters
+        ----------
+        evaluator_id : str
+            Evaluator identifier returned by the evaluator endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        Evaluator
+
+        Examples
+        --------
+        import asyncio
+
+        from langfuse import AsyncLangfuseAPI
+
+        client = AsyncLangfuseAPI(
+            x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME",
+            x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION",
+            x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+            base_url="https://yourhost.com/path/to/api",
+        )
+
+
+        async def main() -> None:
+            await client.unstable.evaluators.get(
+                evaluator_id="evaluatorId",
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.get(
+            evaluator_id, request_options=request_options
+        )
+        return _response.data
diff --git a/langfuse/api/unstable/evaluators/raw_client.py b/langfuse/api/unstable/evaluators/raw_client.py
new file mode 100644
index 000000000..f599e3298
--- /dev/null
+++ b/langfuse/api/unstable/evaluators/raw_client.py
@@ -0,0 +1,1278 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from json.decoder import JSONDecodeError
+
+from ...commons.errors.access_denied_error import (
+    AccessDeniedError as commons_errors_access_denied_error_AccessDeniedError,
+)
+from ...commons.errors.error import Error
+from ...commons.errors.method_not_allowed_error import (
+    MethodNotAllowedError as commons_errors_method_not_allowed_error_MethodNotAllowedError,
+)
+from ...commons.errors.not_found_error import (
+    NotFoundError as commons_errors_not_found_error_NotFoundError,
+)
+from ...commons.errors.unauthorized_error import (
+    UnauthorizedError as commons_errors_unauthorized_error_UnauthorizedError,
+)
+from ...core.api_error import ApiError
+from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ...core.http_response import AsyncHttpResponse, HttpResponse
+from ...core.jsonable_encoder import jsonable_encoder
+from ...core.pydantic_utilities import parse_obj_as
+from ...core.request_options import RequestOptions
+from ...core.serialization import convert_and_respect_annotation_metadata
+from ..commons.types.evaluator_model_config import EvaluatorModelConfig
+from ..commons.types.evaluator_output_definition import EvaluatorOutputDefinition
+from ..errors.errors.access_denied_error import (
+    AccessDeniedError as unstable_errors_errors_access_denied_error_AccessDeniedError,
+)
+from ..errors.errors.bad_request_error import BadRequestError
+from ..errors.errors.conflict_error import ConflictError
+from ..errors.errors.internal_server_error import InternalServerError
+from ..errors.errors.method_not_allowed_error import (
+    MethodNotAllowedError as unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError,
+)
+from ..errors.errors.not_found_error import (
+    NotFoundError as unstable_errors_errors_not_found_error_NotFoundError,
+)
+from ..errors.errors.too_many_requests_error import TooManyRequestsError
+from ..errors.errors.unauthorized_error import (
+    UnauthorizedError as unstable_errors_errors_unauthorized_error_UnauthorizedError,
+)
+from ..errors.errors.unprocessable_content_error import UnprocessableContentError
+from ..errors.types.public_api_error import PublicApiError
+from .types.evaluator import Evaluator
+from .types.evaluators import Evaluators
+
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+
+
+class RawEvaluatorsClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    def create(
+        self,
+        *,
+        name: str,
+        prompt: str,
+        output_definition: EvaluatorOutputDefinition,
+        model_config: typing.Optional[EvaluatorModelConfig] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[Evaluator]:
+        """
+        Create an evaluator in the authenticated project.
+
+        Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration.
+
+        Naming behavior:
+        - If this is a new evaluator name in your project, Langfuse creates version `1`.
+        - If the name already exists in your project, Langfuse creates the next version and returns it.
+        - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name.
+
+        Recommended workflow:
+        1. Create the evaluator.
+        2. Read the returned `variables` array.
+        3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical.
+        4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`.
+
+        Recovery guidance:
+        - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request.
+        - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry.
+        - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape.
+
+        Unstable API note:
+        - This surface may evolve while the underlying evaluation data model is being redesigned.
+
+        Parameters
+        ----------
+        name : str
+            Evaluator name within the authenticated project.
+
+        prompt : str
+            Prompt template used by the evaluator.
+
+        output_definition : EvaluatorOutputDefinition
+            Structured output schema the evaluator must return.
+
+            Always send `dataType`.
+            Do not send `version`; it is an internal storage detail and not part of the public request contract.
+
+        model_config : typing.Optional[EvaluatorModelConfig]
+            Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[Evaluator]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluators",
+            method="POST",
+            json={
+                "name": name,
+                "prompt": prompt,
+                "outputDefinition": convert_and_respect_annotation_metadata(
+                    object_=output_definition,
+                    annotation=EvaluatorOutputDefinition,
+                    direction="write",
+                ),
+                "modelConfig": convert_and_respect_annotation_metadata(
+                    object_=model_config,
+                    annotation=typing.Optional[EvaluatorModelConfig],
+                    direction="write",
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    Evaluator,
+                    parse_obj_as(
+                        type_=Evaluator,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 409:
+                raise ConflictError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableContentError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[Evaluators]:
+        """
+        List the evaluators available to the authenticated project.
+
+        Important behavior:
+        - This endpoint returns the latest version of each available evaluator.
+        - Results can include evaluators from your project and Langfuse-managed evaluators.
+        - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[Evaluators]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluators",
+            method="GET",
+            params={
+                "page": page,
+                "limit": limit,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    Evaluators,
+                    parse_obj_as(
+                        type_=Evaluators,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    def get(
+        self,
+        evaluator_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[Evaluator]:
+        """
+        Get one evaluator by `id`.
+
+        Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule.
+
+        Parameters
+        ----------
+        evaluator_id : str
+            Evaluator identifier returned by the evaluator endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[Evaluator]
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluators/{jsonable_encoder(evaluator_id)}",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    Evaluator,
+                    parse_obj_as(
+                        type_=Evaluator,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+
+class AsyncRawEvaluatorsClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    async def create(
+        self,
+        *,
+        name: str,
+        prompt: str,
+        output_definition: EvaluatorOutputDefinition,
+        model_config: typing.Optional[EvaluatorModelConfig] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[Evaluator]:
+        """
+        Create an evaluator in the authenticated project.
+
+        Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration.
+
+        Naming behavior:
+        - If this is a new evaluator name in your project, Langfuse creates version `1`.
+        - If the name already exists in your project, Langfuse creates the next version and returns it.
+        - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name.
+
+        Recommended workflow:
+        1. Create the evaluator.
+        2. Read the returned `variables` array.
+        3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical.
+        4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`.
+
+        Recovery guidance:
+        - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request.
+        - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry.
+        - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape.
+
+        Unstable API note:
+        - This surface may evolve while the underlying evaluation data model is being redesigned.
+
+        Parameters
+        ----------
+        name : str
+            Evaluator name within the authenticated project.
+
+        prompt : str
+            Prompt template used by the evaluator.
+
+        output_definition : EvaluatorOutputDefinition
+            Structured output schema the evaluator must return.
+
+            Always send `dataType`.
+            Do not send `version`; it is an internal storage detail and not part of the public request contract.
+
+        model_config : typing.Optional[EvaluatorModelConfig]
+            Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[Evaluator]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluators",
+            method="POST",
+            json={
+                "name": name,
+                "prompt": prompt,
+                "outputDefinition": convert_and_respect_annotation_metadata(
+                    object_=output_definition,
+                    annotation=EvaluatorOutputDefinition,
+                    direction="write",
+                ),
+                "modelConfig": convert_and_respect_annotation_metadata(
+                    object_=model_config,
+                    annotation=typing.Optional[EvaluatorModelConfig],
+                    direction="write",
+                ),
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    Evaluator,
+                    parse_obj_as(
+                        type_=Evaluator,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 409:
+                raise ConflictError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 422:
+                raise UnprocessableContentError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    async def list(
+        self,
+        *,
+        page: typing.Optional[int] = None,
+        limit: typing.Optional[int] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[Evaluators]:
+        """
+        List the evaluators available to the authenticated project.
+
+        Important behavior:
+        - This endpoint returns the latest version of each available evaluator.
+        - Results can include evaluators from your project and Langfuse-managed evaluators.
+        - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values.
+
+        Parameters
+        ----------
+        page : typing.Optional[int]
+            1-based page number. Defaults to `1`.
+
+        limit : typing.Optional[int]
+            Maximum number of items per page. Defaults to `50`.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[Evaluators]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "api/public/unstable/evaluators",
+            method="GET",
+            params={
+                "page": page,
+                "limit": limit,
+            },
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    Evaluators,
+                    parse_obj_as(
+                        type_=Evaluators,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
+
+    async def get(
+        self,
+        evaluator_id: str,
+        *,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[Evaluator]:
+        """
+        Get one evaluator by `id`.
+
+        Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule.
+
+        Parameters
+        ----------
+        evaluator_id : str
+            Evaluator identifier returned by the evaluator endpoints.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[Evaluator]
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"api/public/unstable/evaluators/{jsonable_encoder(evaluator_id)}",
+            method="GET",
+            request_options=request_options,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    Evaluator,
+                    parse_obj_as(
+                        type_=Evaluator,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise unstable_errors_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise unstable_errors_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise unstable_errors_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        PublicApiError,
+                        parse_obj_as(
+                            type_=PublicApiError,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise Error(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 401:
+                raise commons_errors_unauthorized_error_UnauthorizedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 403:
+                raise commons_errors_access_denied_error_AccessDeniedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 405:
+                raise commons_errors_method_not_allowed_error_MethodNotAllowedError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            if _response.status_code == 404:
+                raise commons_errors_not_found_error_NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Any,
+                        parse_obj_as(
+                            type_=typing.Any,  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(
+                status_code=_response.status_code,
+                headers=dict(_response.headers),
+                body=_response.text,
+            )
+        raise ApiError(
+            status_code=_response.status_code,
+            headers=dict(_response.headers),
+            body=_response_json,
+        )
diff --git a/langfuse/api/unstable/evaluators/types/__init__.py b/langfuse/api/unstable/evaluators/types/__init__.py
new file mode 100644
index 000000000..6e7a13233
--- /dev/null
+++ b/langfuse/api/unstable/evaluators/types/__init__.py
@@ -0,0 +1,46 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
+import typing
+from importlib import import_module
+
+if typing.TYPE_CHECKING:
+    from .create_evaluator_request import CreateEvaluatorRequest
+    from .evaluator import Evaluator
+    from .evaluators import Evaluators
+_dynamic_imports: typing.Dict[str, str] = {
+    "CreateEvaluatorRequest": ".create_evaluator_request",
+    "Evaluator": ".evaluator",
+    "Evaluators": ".evaluators",
+}
+
+
+def __getattr__(attr_name: str) -> typing.Any:
+    module_name = _dynamic_imports.get(attr_name)
+    if module_name is None:
+        raise AttributeError(
+            f"No {attr_name} found in _dynamic_imports for module name -> {__name__}"
+        )
+    try:
+        module = import_module(module_name, __package__)
+        if module_name == f".{attr_name}":
+            return module
+        else:
+            return getattr(module, attr_name)
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import {attr_name} from {module_name}: {e}"
+        ) from e
+    except AttributeError as e:
+        raise AttributeError(
+            f"Failed to get {attr_name} from {module_name}: {e}"
+        ) from e
+
+
+def __dir__():
+    lazy_attrs = list(_dynamic_imports.keys())
+    return sorted(lazy_attrs)
+
+
+__all__ = ["CreateEvaluatorRequest", "Evaluator", "Evaluators"]
diff --git a/langfuse/api/unstable/evaluators/types/create_evaluator_request.py b/langfuse/api/unstable/evaluators/types/create_evaluator_request.py
new file mode 100644
index 000000000..7616d99ee
--- /dev/null
+++ b/langfuse/api/unstable/evaluators/types/create_evaluator_request.py
@@ -0,0 +1,50 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from ...commons.types.evaluator_model_config import EvaluatorModelConfig
+from ...commons.types.evaluator_output_definition import EvaluatorOutputDefinition
+
+
+class CreateEvaluatorRequest(UniversalBaseModel):
+    """
+    Request body for creating an evaluator.
+
+    If the same `name` already exists in your project, Langfuse creates the next version and returns it.
+    Existing evaluation rules in the same project are then moved to that new latest version automatically.
+    """
+
+    name: str = pydantic.Field()
+    """
+    Evaluator name within the authenticated project.
+    """
+
+    prompt: str = pydantic.Field()
+    """
+    Prompt template used by the evaluator.
+    """
+
+    output_definition: typing_extensions.Annotated[
+        EvaluatorOutputDefinition, FieldMetadata(alias="outputDefinition")
+    ] = pydantic.Field()
+    """
+    Structured output schema the evaluator must return.
+    
+    Always send `dataType`.
+    Do not send `version`; it is an internal storage detail and not part of the public request contract.
+    """
+
+    model_config_: typing_extensions.Annotated[
+        typing.Optional[EvaluatorModelConfig], FieldMetadata(alias="modelConfig")
+    ] = pydantic.Field(default=None)
+    """
+    Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluators/types/evaluator.py b/langfuse/api/unstable/evaluators/types/evaluator.py
new file mode 100644
index 000000000..8023839fc
--- /dev/null
+++ b/langfuse/api/unstable/evaluators/types/evaluator.py
@@ -0,0 +1,118 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+import pydantic
+import typing_extensions
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....core.serialization import FieldMetadata
+from ...commons.types.evaluator_model_config import EvaluatorModelConfig
+from ...commons.types.evaluator_scope import EvaluatorScope
+from ...commons.types.evaluator_type import EvaluatorType
+from ...commons.types.public_evaluator_output_definition import (
+    PublicEvaluatorOutputDefinition,
+)
+
+
+class Evaluator(UniversalBaseModel):
+    """
+    One evaluator that can be used for scoring.
+
+    An evaluator describes **how** to score data:
+    - prompt
+    - extracted prompt variables
+    - output schema
+    - optional explicit model configuration
+
+    It does not define **which** live objects are evaluated. That is the job of `evaluation-rules`.
+
+    For agent clients, the most important fields are:
+    - `variables`: use these exact names when building the evaluation-rule `mapping` array
+    - `outputDefinition`: tells you the expected score type and the evaluator's response instructions
+    - `modelConfig`: tells you whether the evaluator uses the project default model (`null`) or an explicit provider/model
+
+    Versioning behavior:
+    - `GET /evaluators` returns the latest version of each available evaluator.
+    - `GET /evaluators/{id}` can return an older version.
+    - Evaluation rules always run against the latest version for the selected evaluator name within the same source (`project` or `managed`).
+    """
+
+    id: str = pydantic.Field()
+    """
+    Identifier of this evaluator.
+    """
+
+    name: str = pydantic.Field()
+    """
+    Evaluator name.
+    """
+
+    version: int = pydantic.Field()
+    """
+    Version number of this evaluator.
+    """
+
+    scope: EvaluatorScope = pydantic.Field()
+    """
+    Where this evaluator comes from: your project or Langfuse-managed defaults.
+    """
+
+    type: EvaluatorType = pydantic.Field()
+    """
+    Evaluator engine type. Currently always `llm_as_judge`.
+    """
+
+    prompt: str = pydantic.Field()
+    """
+    Prompt template used during evaluation.
+    """
+
+    variables: typing.List[str] = pydantic.Field()
+    """
+    Variables extracted from the evaluator prompt.
+    
+    Every variable in this list must be mapped exactly once when creating an evaluation rule.
+    """
+
+    output_definition: typing_extensions.Annotated[
+        PublicEvaluatorOutputDefinition, FieldMetadata(alias="outputDefinition")
+    ] = pydantic.Field()
+    """
+    Structured output schema returned by this evaluator.
+    
+    Responses always include `dataType` and omit the internal output-definition `version`.
+    Use `dataType` to decide how future scores should be interpreted.
+    """
+
+    model_config_: typing_extensions.Annotated[
+        typing.Optional[EvaluatorModelConfig], FieldMetadata(alias="modelConfig")
+    ] = pydantic.Field(default=None)
+    """
+    Explicit model configuration, or `null` when the project default evaluation model is used.
+    """
+
+    evaluation_rule_count: typing_extensions.Annotated[
+        int, FieldMetadata(alias="evaluationRuleCount")
+    ] = pydantic.Field()
+    """
+    Number of evaluation rules in the project that currently use this evaluator version.
+    """
+
+    created_at: typing_extensions.Annotated[
+        dt.datetime, FieldMetadata(alias="createdAt")
+    ] = pydantic.Field()
+    """
+    Timestamp when this evaluator was created.
+    """
+
+    updated_at: typing_extensions.Annotated[
+        dt.datetime, FieldMetadata(alias="updatedAt")
+    ] = pydantic.Field()
+    """
+    Timestamp when this evaluator was last updated.
+    """
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/evaluators/types/evaluators.py b/langfuse/api/unstable/evaluators/types/evaluators.py
new file mode 100644
index 000000000..51247a66e
--- /dev/null
+++ b/langfuse/api/unstable/evaluators/types/evaluators.py
@@ -0,0 +1,17 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ....core.pydantic_utilities import UniversalBaseModel
+from ....utils.pagination.types.meta_response import MetaResponse
+from .evaluator import Evaluator
+
+
+class Evaluators(UniversalBaseModel):
+    data: typing.List[Evaluator]
+    meta: MetaResponse
+
+    model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(
+        extra="allow", frozen=True
+    )
diff --git a/langfuse/api/unstable/raw_client.py b/langfuse/api/unstable/raw_client.py
new file mode 100644
index 000000000..5201a5119
--- /dev/null
+++ b/langfuse/api/unstable/raw_client.py
@@ -0,0 +1,13 @@
+# This file was auto-generated by Fern from our API Definition.
+
+from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+
+
+class RawUnstableClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+
+class AsyncRawUnstableClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper