From 4c5eb44fcfba3b3ab5d7fb3661b80e58bc64fcd1 Mon Sep 17 00:00:00 2001 From: langfuse-bot Date: Thu, 28 May 2026 13:56:18 +0000 Subject: [PATCH] feat(api): update API spec from langfuse/langfuse 41f5847 --- langfuse/api/__init__.py | 12 + .../api/blob_storage_integrations/__init__.py | 6 + .../api/blob_storage_integrations/client.py | 46 +- .../blob_storage_integrations/raw_client.py | 42 + .../types/__init__.py | 6 + .../types/blob_storage_export_field_group.py | 62 + .../types/blob_storage_export_frequency.py | 4 + .../types/blob_storage_export_source.py | 35 + .../blob_storage_integration_response.py | 13 + .../types/blob_storage_sync_status.py | 4 +- ...create_blob_storage_integration_request.py | 25 + langfuse/api/client.py | 19 + langfuse/api/commons/types/observation_v2.py | 47 +- langfuse/api/legacy/__init__.py | 4 +- langfuse/api/legacy/score_v1/__init__.py | 5 +- langfuse/api/legacy/score_v1/client.py | 11 + langfuse/api/legacy/score_v1/raw_client.py | 11 + .../api/legacy/score_v1/types/__init__.py | 4 +- .../score_v1/types/create_score_request.py | 6 + .../score_v1/types/create_score_source.py | 28 + langfuse/api/llm_connections/__init__.py | 3 + langfuse/api/llm_connections/client.py | 81 + langfuse/api/llm_connections/raw_client.py | 202 ++ .../api/llm_connections/types/__init__.py | 3 + .../types/delete_llm_connection_response.py | 14 + langfuse/api/observations/client.py | 38 +- langfuse/api/observations/raw_client.py | 38 +- langfuse/api/score_configs/client.py | 6 +- langfuse/api/score_configs/raw_client.py | 6 +- .../types/create_score_config_request.py | 6 +- .../types/update_score_config_request.py | 2 +- langfuse/api/scores/client.py | 4 +- langfuse/api/scores/raw_client.py | 4 +- langfuse/api/unstable/__init__.py | 267 ++ langfuse/api/unstable/client.py | 91 + langfuse/api/unstable/commons/__init__.py | 187 ++ .../api/unstable/commons/types/__init__.py | 211 ++ .../array_options_evaluation_rule_filter.py | 26 + .../types/boolean_evaluation_rule_filter.py | 21 + ...category_options_evaluation_rule_filter.py | 26 + .../types/date_time_evaluation_rule_filter.py | 29 + ...tion_rule_array_options_filter_operator.py | 26 + ...evaluation_rule_boolean_filter_operator.py | 22 + .../commons/types/evaluation_rule_filter.py | 740 ++++++ .../commons/types/evaluation_rule_mapping.py | 74 + .../types/evaluation_rule_mapping_source.py | 51 + .../evaluation_rule_null_filter_operator.py | 22 + .../evaluation_rule_number_filter_operator.py | 34 + ...evaluation_rule_options_filter_operator.py | 22 + .../commons/types/evaluation_rule_status.py | 34 + .../evaluation_rule_string_filter_operator.py | 34 + .../commons/types/evaluation_rule_target.py | 33 + .../commons/types/evaluator_model_config.py | 46 + .../types/evaluator_output_data_type.py | 35 + .../types/evaluator_output_definition.py | 161 ++ .../evaluator_output_field_definition.py | 17 + .../unstable/commons/types/evaluator_scope.py | 29 + .../unstable/commons/types/evaluator_type.py | 21 + .../types/null_evaluation_rule_filter.py | 24 + .../types/number_evaluation_rule_filter.py | 21 + .../number_object_evaluation_rule_filter.py | 26 + ...lic_boolean_evaluator_output_definition.py | 26 + ...categorical_evaluator_output_definition.py | 29 + ...rical_evaluator_output_score_definition.py | 20 + .../public_evaluator_output_definition.py | 167 ++ ...lic_numeric_evaluator_output_definition.py | 26 + .../types/string_evaluation_rule_filter.py | 21 + .../string_object_evaluation_rule_filter.py | 26 + .../string_options_evaluation_rule_filter.py | 24 + langfuse/api/unstable/errors/__init__.py | 84 + .../api/unstable/errors/errors/__init__.py | 68 + .../errors/errors/access_denied_error.py | 15 + .../errors/errors/bad_request_error.py | 15 + .../unstable/errors/errors/conflict_error.py | 15 + .../errors/errors/internal_server_error.py | 15 + .../errors/errors/method_not_allowed_error.py | 15 + .../unstable/errors/errors/not_found_error.py | 15 + .../errors/errors/too_many_requests_error.py | 15 + .../errors/errors/unauthorized_error.py | 15 + .../errors/unprocessable_content_error.py | 15 + .../api/unstable/errors/types/__init__.py | 53 + .../unstable/errors/types/public_api_error.py | 58 + .../errors/types/public_api_error_code.py | 93 + .../errors/types/public_api_error_details.py | 114 + .../types/public_api_validation_issue.py | 34 + .../api/unstable/evaluation_rules/__init__.py | 64 + .../api/unstable/evaluation_rules/client.py | 859 +++++++ .../unstable/evaluation_rules/raw_client.py | 2271 +++++++++++++++++ .../evaluation_rules/types/__init__.py | 62 + .../types/create_evaluation_rule_request.py | 75 + .../types/delete_evaluation_rule_response.py | 21 + .../evaluation_rules/types/evaluation_rule.py | 172 ++ .../types/evaluation_rule_evaluator.py | 35 + .../evaluation_rule_evaluator_reference.py | 29 + .../types/evaluation_rules.py | 28 + .../types/update_evaluation_rule_request.py | 74 + langfuse/api/unstable/evaluators/__init__.py | 44 + langfuse/api/unstable/evaluators/client.py | 458 ++++ .../api/unstable/evaluators/raw_client.py | 1278 ++++++++++ .../api/unstable/evaluators/types/__init__.py | 46 + .../types/create_evaluator_request.py | 50 + .../unstable/evaluators/types/evaluator.py | 118 + .../unstable/evaluators/types/evaluators.py | 17 + langfuse/api/unstable/raw_client.py | 13 + 104 files changed, 9784 insertions(+), 35 deletions(-) create mode 100644 langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py create mode 100644 langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py create mode 100644 langfuse/api/legacy/score_v1/types/create_score_source.py create mode 100644 langfuse/api/llm_connections/types/delete_llm_connection_response.py create mode 100644 langfuse/api/unstable/__init__.py create mode 100644 langfuse/api/unstable/client.py create mode 100644 langfuse/api/unstable/commons/__init__.py create mode 100644 langfuse/api/unstable/commons/types/__init__.py create mode 100644 langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_mapping.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_status.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py create mode 100644 langfuse/api/unstable/commons/types/evaluation_rule_target.py create mode 100644 langfuse/api/unstable/commons/types/evaluator_model_config.py create mode 100644 langfuse/api/unstable/commons/types/evaluator_output_data_type.py create mode 100644 langfuse/api/unstable/commons/types/evaluator_output_definition.py create mode 100644 langfuse/api/unstable/commons/types/evaluator_output_field_definition.py create mode 100644 langfuse/api/unstable/commons/types/evaluator_scope.py create mode 100644 langfuse/api/unstable/commons/types/evaluator_type.py create mode 100644 langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py create mode 100644 langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py create mode 100644 langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py create mode 100644 langfuse/api/unstable/commons/types/public_evaluator_output_definition.py create mode 100644 langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py create mode 100644 langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py create mode 100644 langfuse/api/unstable/errors/__init__.py create mode 100644 langfuse/api/unstable/errors/errors/__init__.py create mode 100644 langfuse/api/unstable/errors/errors/access_denied_error.py create mode 100644 langfuse/api/unstable/errors/errors/bad_request_error.py create mode 100644 langfuse/api/unstable/errors/errors/conflict_error.py create mode 100644 langfuse/api/unstable/errors/errors/internal_server_error.py create mode 100644 langfuse/api/unstable/errors/errors/method_not_allowed_error.py create mode 100644 langfuse/api/unstable/errors/errors/not_found_error.py create mode 100644 langfuse/api/unstable/errors/errors/too_many_requests_error.py create mode 100644 langfuse/api/unstable/errors/errors/unauthorized_error.py create mode 100644 langfuse/api/unstable/errors/errors/unprocessable_content_error.py create mode 100644 langfuse/api/unstable/errors/types/__init__.py create mode 100644 langfuse/api/unstable/errors/types/public_api_error.py create mode 100644 langfuse/api/unstable/errors/types/public_api_error_code.py create mode 100644 langfuse/api/unstable/errors/types/public_api_error_details.py create mode 100644 langfuse/api/unstable/errors/types/public_api_validation_issue.py create mode 100644 langfuse/api/unstable/evaluation_rules/__init__.py create mode 100644 langfuse/api/unstable/evaluation_rules/client.py create mode 100644 langfuse/api/unstable/evaluation_rules/raw_client.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/__init__.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py create mode 100644 langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py create mode 100644 langfuse/api/unstable/evaluators/__init__.py create mode 100644 langfuse/api/unstable/evaluators/client.py create mode 100644 langfuse/api/unstable/evaluators/raw_client.py create mode 100644 langfuse/api/unstable/evaluators/types/__init__.py create mode 100644 langfuse/api/unstable/evaluators/types/create_evaluator_request.py create mode 100644 langfuse/api/unstable/evaluators/types/evaluator.py create mode 100644 langfuse/api/unstable/evaluators/types/evaluators.py create mode 100644 langfuse/api/unstable/raw_client.py diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py index aa103cf12..0e036263a 100644 --- a/langfuse/api/__init__.py +++ b/langfuse/api/__init__.py @@ -32,6 +32,7 @@ scores, sessions, trace, + unstable, utils, ) from .annotation_queues import ( @@ -50,8 +51,10 @@ UpdateAnnotationQueueItemRequest, ) from .blob_storage_integrations import ( + BlobStorageExportFieldGroup, BlobStorageExportFrequency, BlobStorageExportMode, + BlobStorageExportSource, BlobStorageIntegrationDeletionResponse, BlobStorageIntegrationFileType, BlobStorageIntegrationResponse, @@ -186,6 +189,7 @@ UsageDetails, ) from .llm_connections import ( + DeleteLlmConnectionResponse, LlmAdapter, LlmConnection, PaginatedLlmConnections, @@ -312,8 +316,10 @@ "BasePrompt": ".prompts", "BaseScore": ".commons", "BaseScoreV1": ".commons", + "BlobStorageExportFieldGroup": ".blob_storage_integrations", "BlobStorageExportFrequency": ".blob_storage_integrations", "BlobStorageExportMode": ".blob_storage_integrations", + "BlobStorageExportSource": ".blob_storage_integrations", "BlobStorageIntegrationDeletionResponse": ".blob_storage_integrations", "BlobStorageIntegrationFileType": ".blob_storage_integrations", "BlobStorageIntegrationResponse": ".blob_storage_integrations", @@ -368,6 +374,7 @@ "DeleteAnnotationQueueItemResponse": ".annotation_queues", "DeleteDatasetItemResponse": ".dataset_items", "DeleteDatasetRunResponse": ".datasets", + "DeleteLlmConnectionResponse": ".llm_connections", "DeleteMembershipRequest": ".organizations", "DeleteTraceResponse": ".trace", "EmptyResponse": ".scim", @@ -557,6 +564,7 @@ "scores": ".scores", "sessions": ".sessions", "trace": ".trace", + "unstable": ".unstable", "utils": ".utils", } @@ -605,8 +613,10 @@ def __dir__(): "BasePrompt", "BaseScore", "BaseScoreV1", + "BlobStorageExportFieldGroup", "BlobStorageExportFrequency", "BlobStorageExportMode", + "BlobStorageExportSource", "BlobStorageIntegrationDeletionResponse", "BlobStorageIntegrationFileType", "BlobStorageIntegrationResponse", @@ -661,6 +671,7 @@ def __dir__(): "DeleteAnnotationQueueItemResponse", "DeleteDatasetItemResponse", "DeleteDatasetRunResponse", + "DeleteLlmConnectionResponse", "DeleteMembershipRequest", "DeleteTraceResponse", "EmptyResponse", @@ -850,5 +861,6 @@ def __dir__(): "scores", "sessions", "trace", + "unstable", "utils", ] diff --git a/langfuse/api/blob_storage_integrations/__init__.py b/langfuse/api/blob_storage_integrations/__init__.py index 266be2a6c..d92046ef2 100644 --- a/langfuse/api/blob_storage_integrations/__init__.py +++ b/langfuse/api/blob_storage_integrations/__init__.py @@ -7,8 +7,10 @@ if typing.TYPE_CHECKING: from .types import ( + BlobStorageExportFieldGroup, BlobStorageExportFrequency, BlobStorageExportMode, + BlobStorageExportSource, BlobStorageIntegrationDeletionResponse, BlobStorageIntegrationFileType, BlobStorageIntegrationResponse, @@ -19,8 +21,10 @@ CreateBlobStorageIntegrationRequest, ) _dynamic_imports: typing.Dict[str, str] = { + "BlobStorageExportFieldGroup": ".types", "BlobStorageExportFrequency": ".types", "BlobStorageExportMode": ".types", + "BlobStorageExportSource": ".types", "BlobStorageIntegrationDeletionResponse": ".types", "BlobStorageIntegrationFileType": ".types", "BlobStorageIntegrationResponse": ".types", @@ -60,8 +64,10 @@ def __dir__(): __all__ = [ + "BlobStorageExportFieldGroup", "BlobStorageExportFrequency", "BlobStorageExportMode", + "BlobStorageExportSource", "BlobStorageIntegrationDeletionResponse", "BlobStorageIntegrationFileType", "BlobStorageIntegrationResponse", diff --git a/langfuse/api/blob_storage_integrations/client.py b/langfuse/api/blob_storage_integrations/client.py index 21eeffde3..609e83fd3 100644 --- a/langfuse/api/blob_storage_integrations/client.py +++ b/langfuse/api/blob_storage_integrations/client.py @@ -9,8 +9,10 @@ AsyncRawBlobStorageIntegrationsClient, RawBlobStorageIntegrationsClient, ) +from .types.blob_storage_export_field_group import BlobStorageExportFieldGroup from .types.blob_storage_export_frequency import BlobStorageExportFrequency from .types.blob_storage_export_mode import BlobStorageExportMode +from .types.blob_storage_export_source import BlobStorageExportSource from .types.blob_storage_integration_deletion_response import ( BlobStorageIntegrationDeletionResponse, ) @@ -95,6 +97,10 @@ def upsert_blob_storage_integration( prefix: typing.Optional[str] = OMIT, export_start_date: typing.Optional[dt.datetime] = OMIT, compressed: typing.Optional[bool] = OMIT, + export_source: typing.Optional[BlobStorageExportSource] = OMIT, + export_field_groups: typing.Optional[ + typing.Sequence[BlobStorageExportFieldGroup] + ] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> BlobStorageIntegrationResponse: """ @@ -143,6 +149,20 @@ def upsert_blob_storage_integration( compressed : typing.Optional[bool] Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true. + export_source : typing.Optional[BlobStorageExportSource] + Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided. + + **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected. + + export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]] + Field groups to include in each exported row. + + For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved. + + For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups. + + `exportFieldGroups` requires `exportSource` to be provided in the same request. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -173,7 +193,7 @@ def upsert_blob_storage_integration( type=BlobStorageIntegrationType.S3, bucket_name="bucketName", region="region", - export_frequency=BlobStorageExportFrequency.HOURLY, + export_frequency=BlobStorageExportFrequency.EVERY20MINUTES, enabled=True, force_path_style=True, file_type=BlobStorageIntegrationFileType.JSON, @@ -196,6 +216,8 @@ def upsert_blob_storage_integration( prefix=prefix, export_start_date=export_start_date, compressed=compressed, + export_source=export_source, + export_field_groups=export_field_groups, request_options=request_options, ) return _response.data @@ -354,6 +376,10 @@ async def upsert_blob_storage_integration( prefix: typing.Optional[str] = OMIT, export_start_date: typing.Optional[dt.datetime] = OMIT, compressed: typing.Optional[bool] = OMIT, + export_source: typing.Optional[BlobStorageExportSource] = OMIT, + export_field_groups: typing.Optional[ + typing.Sequence[BlobStorageExportFieldGroup] + ] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> BlobStorageIntegrationResponse: """ @@ -402,6 +428,20 @@ async def upsert_blob_storage_integration( compressed : typing.Optional[bool] Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true. + export_source : typing.Optional[BlobStorageExportSource] + Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided. + + **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected. + + export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]] + Field groups to include in each exported row. + + For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved. + + For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups. + + `exportFieldGroups` requires `exportSource` to be provided in the same request. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -437,7 +477,7 @@ async def main() -> None: type=BlobStorageIntegrationType.S3, bucket_name="bucketName", region="region", - export_frequency=BlobStorageExportFrequency.HOURLY, + export_frequency=BlobStorageExportFrequency.EVERY20MINUTES, enabled=True, force_path_style=True, file_type=BlobStorageIntegrationFileType.JSON, @@ -463,6 +503,8 @@ async def main() -> None: prefix=prefix, export_start_date=export_start_date, compressed=compressed, + export_source=export_source, + export_field_groups=export_field_groups, request_options=request_options, ) return _response.data diff --git a/langfuse/api/blob_storage_integrations/raw_client.py b/langfuse/api/blob_storage_integrations/raw_client.py index 5833ea63e..09e036db6 100644 --- a/langfuse/api/blob_storage_integrations/raw_client.py +++ b/langfuse/api/blob_storage_integrations/raw_client.py @@ -15,8 +15,10 @@ from ..core.jsonable_encoder import jsonable_encoder from ..core.pydantic_utilities import parse_obj_as from ..core.request_options import RequestOptions +from .types.blob_storage_export_field_group import BlobStorageExportFieldGroup from .types.blob_storage_export_frequency import BlobStorageExportFrequency from .types.blob_storage_export_mode import BlobStorageExportMode +from .types.blob_storage_export_source import BlobStorageExportSource from .types.blob_storage_integration_deletion_response import ( BlobStorageIntegrationDeletionResponse, ) @@ -152,6 +154,10 @@ def upsert_blob_storage_integration( prefix: typing.Optional[str] = OMIT, export_start_date: typing.Optional[dt.datetime] = OMIT, compressed: typing.Optional[bool] = OMIT, + export_source: typing.Optional[BlobStorageExportSource] = OMIT, + export_field_groups: typing.Optional[ + typing.Sequence[BlobStorageExportFieldGroup] + ] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[BlobStorageIntegrationResponse]: """ @@ -200,6 +206,20 @@ def upsert_blob_storage_integration( compressed : typing.Optional[bool] Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true. + export_source : typing.Optional[BlobStorageExportSource] + Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided. + + **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected. + + export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]] + Field groups to include in each exported row. + + For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved. + + For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups. + + `exportFieldGroups` requires `exportSource` to be provided in the same request. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -226,6 +246,8 @@ def upsert_blob_storage_integration( "exportMode": export_mode, "exportStartDate": export_start_date, "compressed": compressed, + "exportSource": export_source, + "exportFieldGroups": export_field_groups, }, request_options=request_options, omit=OMIT, @@ -629,6 +651,10 @@ async def upsert_blob_storage_integration( prefix: typing.Optional[str] = OMIT, export_start_date: typing.Optional[dt.datetime] = OMIT, compressed: typing.Optional[bool] = OMIT, + export_source: typing.Optional[BlobStorageExportSource] = OMIT, + export_field_groups: typing.Optional[ + typing.Sequence[BlobStorageExportFieldGroup] + ] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[BlobStorageIntegrationResponse]: """ @@ -677,6 +703,20 @@ async def upsert_blob_storage_integration( compressed : typing.Optional[bool] Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true. + export_source : typing.Optional[BlobStorageExportSource] + Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided. + + **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected. + + export_field_groups : typing.Optional[typing.Sequence[BlobStorageExportFieldGroup]] + Field groups to include in each exported row. + + For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved. + + For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups. + + `exportFieldGroups` requires `exportSource` to be provided in the same request. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -703,6 +743,8 @@ async def upsert_blob_storage_integration( "exportMode": export_mode, "exportStartDate": export_start_date, "compressed": compressed, + "exportSource": export_source, + "exportFieldGroups": export_field_groups, }, request_options=request_options, omit=OMIT, diff --git a/langfuse/api/blob_storage_integrations/types/__init__.py b/langfuse/api/blob_storage_integrations/types/__init__.py index e0fe3e9ff..3a2a0e1ec 100644 --- a/langfuse/api/blob_storage_integrations/types/__init__.py +++ b/langfuse/api/blob_storage_integrations/types/__init__.py @@ -6,8 +6,10 @@ from importlib import import_module if typing.TYPE_CHECKING: + from .blob_storage_export_field_group import BlobStorageExportFieldGroup from .blob_storage_export_frequency import BlobStorageExportFrequency from .blob_storage_export_mode import BlobStorageExportMode + from .blob_storage_export_source import BlobStorageExportSource from .blob_storage_integration_deletion_response import ( BlobStorageIntegrationDeletionResponse, ) @@ -23,8 +25,10 @@ CreateBlobStorageIntegrationRequest, ) _dynamic_imports: typing.Dict[str, str] = { + "BlobStorageExportFieldGroup": ".blob_storage_export_field_group", "BlobStorageExportFrequency": ".blob_storage_export_frequency", "BlobStorageExportMode": ".blob_storage_export_mode", + "BlobStorageExportSource": ".blob_storage_export_source", "BlobStorageIntegrationDeletionResponse": ".blob_storage_integration_deletion_response", "BlobStorageIntegrationFileType": ".blob_storage_integration_file_type", "BlobStorageIntegrationResponse": ".blob_storage_integration_response", @@ -64,8 +68,10 @@ def __dir__(): __all__ = [ + "BlobStorageExportFieldGroup", "BlobStorageExportFrequency", "BlobStorageExportMode", + "BlobStorageExportSource", "BlobStorageIntegrationDeletionResponse", "BlobStorageIntegrationFileType", "BlobStorageIntegrationResponse", diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py b/langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py new file mode 100644 index 000000000..c21a9c3bb --- /dev/null +++ b/langfuse/api/blob_storage_integrations/types/blob_storage_export_field_group.py @@ -0,0 +1,62 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ...core import enum + +T_Result = typing.TypeVar("T_Result") + + +class BlobStorageExportFieldGroup(enum.StrEnum): + """ + Field group for the OBSERVATIONS_V2 and LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS export. + """ + + CORE = "core" + BASIC = "basic" + TIME = "time" + IO = "io" + METADATA = "metadata" + MODEL = "model" + USAGE = "usage" + PROMPT = "prompt" + METRICS = "metrics" + TOOLS = "tools" + TRACE_CONTEXT = "trace_context" + + def visit( + self, + core: typing.Callable[[], T_Result], + basic: typing.Callable[[], T_Result], + time: typing.Callable[[], T_Result], + io: typing.Callable[[], T_Result], + metadata: typing.Callable[[], T_Result], + model: typing.Callable[[], T_Result], + usage: typing.Callable[[], T_Result], + prompt: typing.Callable[[], T_Result], + metrics: typing.Callable[[], T_Result], + tools: typing.Callable[[], T_Result], + trace_context: typing.Callable[[], T_Result], + ) -> T_Result: + if self is BlobStorageExportFieldGroup.CORE: + return core() + if self is BlobStorageExportFieldGroup.BASIC: + return basic() + if self is BlobStorageExportFieldGroup.TIME: + return time() + if self is BlobStorageExportFieldGroup.IO: + return io() + if self is BlobStorageExportFieldGroup.METADATA: + return metadata() + if self is BlobStorageExportFieldGroup.MODEL: + return model() + if self is BlobStorageExportFieldGroup.USAGE: + return usage() + if self is BlobStorageExportFieldGroup.PROMPT: + return prompt() + if self is BlobStorageExportFieldGroup.METRICS: + return metrics() + if self is BlobStorageExportFieldGroup.TOOLS: + return tools() + if self is BlobStorageExportFieldGroup.TRACE_CONTEXT: + return trace_context() diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py b/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py index bcc7fc6d5..4799ecefb 100644 --- a/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py +++ b/langfuse/api/blob_storage_integrations/types/blob_storage_export_frequency.py @@ -8,16 +8,20 @@ class BlobStorageExportFrequency(enum.StrEnum): + EVERY20MINUTES = "every_20_minutes" HOURLY = "hourly" DAILY = "daily" WEEKLY = "weekly" def visit( self, + every20minutes: typing.Callable[[], T_Result], hourly: typing.Callable[[], T_Result], daily: typing.Callable[[], T_Result], weekly: typing.Callable[[], T_Result], ) -> T_Result: + if self is BlobStorageExportFrequency.EVERY20MINUTES: + return every20minutes() if self is BlobStorageExportFrequency.HOURLY: return hourly() if self is BlobStorageExportFrequency.DAILY: diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py b/langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py new file mode 100644 index 000000000..1451473b4 --- /dev/null +++ b/langfuse/api/blob_storage_integrations/types/blob_storage_export_source.py @@ -0,0 +1,35 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ...core import enum + +T_Result = typing.TypeVar("T_Result") + + +class BlobStorageExportSource(enum.StrEnum): + """ + What data the integration exports. + - `LEGACY_TRACES_OBSERVATIONS`: traces, observations, and scores tables with a fixed column set. The `exportFieldGroups` field is not applicable. + - `OBSERVATIONS_V2`: same data model as the `/api/public/v2/observations` endpoint, plus scores. Columns are controlled by `exportFieldGroups`. + - `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: both sets. For the `OBSERVATIONS_V2` portion, columns are controlled by `exportFieldGroups`. + + **Note:** `OBSERVATIONS_V2` and the enriched-observations portion of `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` rely on the enriched observations table (Langfuse Fast Preview / v4), which is currently available on Langfuse Cloud only. See https://langfuse.com/docs/v4. + """ + + LEGACY_TRACES_OBSERVATIONS = "LEGACY_TRACES_OBSERVATIONS" + OBSERVATIONS_V2 = "OBSERVATIONS_V2" + LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS = "LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS" + + def visit( + self, + legacy_traces_observations: typing.Callable[[], T_Result], + observations_v2: typing.Callable[[], T_Result], + legacy_traces_and_enriched_observations: typing.Callable[[], T_Result], + ) -> T_Result: + if self is BlobStorageExportSource.LEGACY_TRACES_OBSERVATIONS: + return legacy_traces_observations() + if self is BlobStorageExportSource.OBSERVATIONS_V2: + return observations_v2() + if self is BlobStorageExportSource.LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS: + return legacy_traces_and_enriched_observations() diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py b/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py index b3630297b..e2b5921a0 100644 --- a/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py +++ b/langfuse/api/blob_storage_integrations/types/blob_storage_integration_response.py @@ -7,8 +7,10 @@ import typing_extensions from ...core.pydantic_utilities import UniversalBaseModel from ...core.serialization import FieldMetadata +from .blob_storage_export_field_group import BlobStorageExportFieldGroup from .blob_storage_export_frequency import BlobStorageExportFrequency from .blob_storage_export_mode import BlobStorageExportMode +from .blob_storage_export_source import BlobStorageExportSource from .blob_storage_integration_file_type import BlobStorageIntegrationFileType from .blob_storage_integration_type import BlobStorageIntegrationType @@ -41,6 +43,17 @@ class BlobStorageIntegrationResponse(UniversalBaseModel): typing.Optional[dt.datetime], FieldMetadata(alias="exportStartDate") ] = None compressed: bool + export_source: typing_extensions.Annotated[ + BlobStorageExportSource, FieldMetadata(alias="exportSource") + ] + export_field_groups: typing_extensions.Annotated[ + typing.Optional[typing.List[BlobStorageExportFieldGroup]], + FieldMetadata(alias="exportFieldGroups"), + ] = pydantic.Field(default=None) + """ + Field groups included in each exported row for `OBSERVATIONS_V2` / `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` sources. Always `null` when exportSource is `LEGACY_TRACES_OBSERVATIONS` (the field does not apply to that source; any legacy DB value is hidden from the public surface). + """ + next_sync_at: typing_extensions.Annotated[ typing.Optional[dt.datetime], FieldMetadata(alias="nextSyncAt") ] = None diff --git a/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py b/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py index 254e06645..559e41450 100644 --- a/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py +++ b/langfuse/api/blob_storage_integrations/types/blob_storage_sync_status.py @@ -17,8 +17,8 @@ class BlobStorageSyncStatus(enum.StrEnum): - `up_to_date` — all available data has been exported; next export is scheduled for the future **ETL usage**: poll this endpoint and check for `up_to_date` status. Compare `lastSyncAt` against your - ETL bookmark to determine if new data is available. Note that exports run with a 30-minute lag buffer, - so `lastSyncAt` will always be at least 30 minutes behind real-time. + ETL bookmark to determine if new data is available. Note that exports run with a 20-minute lag buffer, + so `lastSyncAt` will always be at least 20 minutes behind real-time. """ IDLE = "idle" diff --git a/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py b/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py index ada6e432b..89c9bca4a 100644 --- a/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py +++ b/langfuse/api/blob_storage_integrations/types/create_blob_storage_integration_request.py @@ -7,8 +7,10 @@ import typing_extensions from ...core.pydantic_utilities import UniversalBaseModel from ...core.serialization import FieldMetadata +from .blob_storage_export_field_group import BlobStorageExportFieldGroup from .blob_storage_export_frequency import BlobStorageExportFrequency from .blob_storage_export_mode import BlobStorageExportMode +from .blob_storage_export_source import BlobStorageExportSource from .blob_storage_integration_file_type import BlobStorageIntegrationFileType from .blob_storage_integration_type import BlobStorageIntegrationType @@ -91,6 +93,29 @@ class CreateBlobStorageIntegrationRequest(UniversalBaseModel): Enable gzip compression for exported files (.csv.gz, .json.gz, .jsonl.gz). Defaults to true. """ + export_source: typing_extensions.Annotated[ + typing.Optional[BlobStorageExportSource], FieldMetadata(alias="exportSource") + ] = pydantic.Field(default=None) + """ + Data to export. When omitted on update, the existing value is preserved. When omitted on create: pre-cutoff Cloud projects and self-hosted deployments fall back to `LEGACY_TRACES_OBSERVATIONS`; post-cutoff Cloud projects (created on or after 2026-05-20) auto-default to `OBSERVATIONS_V2`. Required when `exportFieldGroups` is provided. + + **Cloud-only deprecation gate (effective 2026-05-20):** For projects created on or after 2026-05-20 on Langfuse Cloud, `LEGACY_TRACES_OBSERVATIONS` and `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS` are rejected with HTTP 400. Omitting `exportSource` on these projects silently defaults to `OBSERVATIONS_V2` rather than the schema column default. Use `OBSERVATIONS_V2` for all new integrations. Projects created before 2026-05-20 and self-hosted deployments are unaffected. + """ + + export_field_groups: typing_extensions.Annotated[ + typing.Optional[typing.List[BlobStorageExportFieldGroup]], + FieldMetadata(alias="exportFieldGroups"), + ] = pydantic.Field(default=None) + """ + Field groups to include in each exported row. + + For exportSource `OBSERVATIONS_V2` or `LEGACY_TRACES_AND_ENRICHED_OBSERVATIONS`: must include `core` if provided. When omitted on create, the column default (all groups) applies. When omitted on update, the existing value is preserved. + + For exportSource `LEGACY_TRACES_OBSERVATIONS`: this field must be omitted or null. Sending an array (including an empty array) returns 400, because that source uses a fixed column set and does not honor field groups. + + `exportFieldGroups` requires `exportSource` to be provided in the same request. + """ + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( extra="allow", frozen=True ) diff --git a/langfuse/api/client.py b/langfuse/api/client.py index 3f656cdcd..c0413704b 100644 --- a/langfuse/api/client.py +++ b/langfuse/api/client.py @@ -41,6 +41,7 @@ from .scores.client import AsyncScoresClient, ScoresClient from .sessions.client import AsyncSessionsClient, SessionsClient from .trace.client import AsyncTraceClient, TraceClient + from .unstable.client import AsyncUnstableClient, UnstableClient class LangfuseAPI: @@ -147,6 +148,7 @@ def __init__( self._scores: typing.Optional[ScoresClient] = None self._sessions: typing.Optional[SessionsClient] = None self._trace: typing.Optional[TraceClient] = None + self._unstable: typing.Optional[UnstableClient] = None @property def annotation_queues(self): @@ -358,6 +360,14 @@ def trace(self): self._trace = TraceClient(client_wrapper=self._client_wrapper) return self._trace + @property + def unstable(self): + if self._unstable is None: + from .unstable.client import UnstableClient # noqa: E402 + + self._unstable = UnstableClient(client_wrapper=self._client_wrapper) + return self._unstable + class AsyncLangfuseAPI: """ @@ -463,6 +473,7 @@ def __init__( self._scores: typing.Optional[AsyncScoresClient] = None self._sessions: typing.Optional[AsyncSessionsClient] = None self._trace: typing.Optional[AsyncTraceClient] = None + self._unstable: typing.Optional[AsyncUnstableClient] = None @property def annotation_queues(self): @@ -677,3 +688,11 @@ def trace(self): self._trace = AsyncTraceClient(client_wrapper=self._client_wrapper) return self._trace + + @property + def unstable(self): + if self._unstable is None: + from .unstable.client import AsyncUnstableClient # noqa: E402 + + self._unstable = AsyncUnstableClient(client_wrapper=self._client_wrapper) + return self._unstable diff --git a/langfuse/api/commons/types/observation_v2.py b/langfuse/api/commons/types/observation_v2.py index 149dfb422..08c1604cf 100644 --- a/langfuse/api/commons/types/observation_v2.py +++ b/langfuse/api/commons/types/observation_v2.py @@ -190,6 +190,13 @@ class ObservationV2(UniversalBaseModel): The total cost of the observation in USD """ + usage_pricing_tier_name: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="usagePricingTierName") + ] = pydantic.Field(default=None) + """ + The name of the pricing tier applied to this observation's usage costs + """ + prompt_id: typing_extensions.Annotated[ typing.Optional[str], FieldMetadata(alias="promptId") ] = pydantic.Field(default=None) @@ -227,7 +234,45 @@ class ObservationV2(UniversalBaseModel): typing.Optional[str], FieldMetadata(alias="modelId") ] = pydantic.Field(default=None) """ - The matched model ID + The matched model ID. Null when the `model` field group is not requested. + """ + + input_price: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="inputPrice") + ] = pydantic.Field(default=None) + """ + The input token price (USD per unit) from the matched model, serialized as a decimal string (e.g. "0.0001"). Null when the `model` field group is not requested. + """ + + output_price: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="outputPrice") + ] = pydantic.Field(default=None) + """ + The output token price (USD per unit) from the matched model, serialized as a decimal string (e.g. "0.0001"). Null when the `model` field group is not requested. + """ + + total_price: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="totalPrice") + ] = pydantic.Field(default=None) + """ + The total token price (USD per unit) from the matched model, serialized as a decimal string (e.g. "0.0001"). Null when the `model` field group is not requested. + """ + + trace_name: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="traceName") + ] = pydantic.Field(default=None) + """ + The name of the parent trace + """ + + tags: typing.Optional[typing.List[str]] = pydantic.Field(default=None) + """ + Tags from the parent trace (denormalized onto the observation) + """ + + release: typing.Optional[str] = pydantic.Field(default=None) + """ + The release version of the parent trace """ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( diff --git a/langfuse/api/legacy/__init__.py b/langfuse/api/legacy/__init__.py index d91b42c2b..0a67d1c0c 100644 --- a/langfuse/api/legacy/__init__.py +++ b/langfuse/api/legacy/__init__.py @@ -9,10 +9,11 @@ from . import metrics_v1, observations_v1, score_v1 from .metrics_v1 import MetricsResponse from .observations_v1 import Observations, ObservationsViews - from .score_v1 import CreateScoreRequest, CreateScoreResponse + from .score_v1 import CreateScoreRequest, CreateScoreResponse, CreateScoreSource _dynamic_imports: typing.Dict[str, str] = { "CreateScoreRequest": ".score_v1", "CreateScoreResponse": ".score_v1", + "CreateScoreSource": ".score_v1", "MetricsResponse": ".metrics_v1", "Observations": ".observations_v1", "ObservationsViews": ".observations_v1", @@ -52,6 +53,7 @@ def __dir__(): __all__ = [ "CreateScoreRequest", "CreateScoreResponse", + "CreateScoreSource", "MetricsResponse", "Observations", "ObservationsViews", diff --git a/langfuse/api/legacy/score_v1/__init__.py b/langfuse/api/legacy/score_v1/__init__.py index 3d0c7422a..4841a9656 100644 --- a/langfuse/api/legacy/score_v1/__init__.py +++ b/langfuse/api/legacy/score_v1/__init__.py @@ -6,10 +6,11 @@ from importlib import import_module if typing.TYPE_CHECKING: - from .types import CreateScoreRequest, CreateScoreResponse + from .types import CreateScoreRequest, CreateScoreResponse, CreateScoreSource _dynamic_imports: typing.Dict[str, str] = { "CreateScoreRequest": ".types", "CreateScoreResponse": ".types", + "CreateScoreSource": ".types", } @@ -40,4 +41,4 @@ def __dir__(): return sorted(lazy_attrs) -__all__ = ["CreateScoreRequest", "CreateScoreResponse"] +__all__ = ["CreateScoreRequest", "CreateScoreResponse", "CreateScoreSource"] diff --git a/langfuse/api/legacy/score_v1/client.py b/langfuse/api/legacy/score_v1/client.py index 03ca8b836..60f118747 100644 --- a/langfuse/api/legacy/score_v1/client.py +++ b/langfuse/api/legacy/score_v1/client.py @@ -8,6 +8,7 @@ from ...core.request_options import RequestOptions from .raw_client import AsyncRawScoreV1Client, RawScoreV1Client from .types.create_score_response import CreateScoreResponse +from .types.create_score_source import CreateScoreSource # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -44,6 +45,7 @@ def create( queue_id: typing.Optional[str] = OMIT, data_type: typing.Optional[ScoreDataType] = OMIT, config_id: typing.Optional[str] = OMIT, + source: typing.Optional[CreateScoreSource] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> CreateScoreResponse: """ @@ -82,6 +84,9 @@ def create( config_id : typing.Optional[str] Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated. + source : typing.Optional[CreateScoreSource] + The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -120,6 +125,7 @@ def create( queue_id=queue_id, data_type=data_type, config_id=config_id, + source=source, request_options=request_options, ) return _response.data @@ -193,6 +199,7 @@ async def create( queue_id: typing.Optional[str] = OMIT, data_type: typing.Optional[ScoreDataType] = OMIT, config_id: typing.Optional[str] = OMIT, + source: typing.Optional[CreateScoreSource] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> CreateScoreResponse: """ @@ -231,6 +238,9 @@ async def create( config_id : typing.Optional[str] Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated. + source : typing.Optional[CreateScoreSource] + The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -277,6 +287,7 @@ async def main() -> None: queue_id=queue_id, data_type=data_type, config_id=config_id, + source=source, request_options=request_options, ) return _response.data diff --git a/langfuse/api/legacy/score_v1/raw_client.py b/langfuse/api/legacy/score_v1/raw_client.py index 834560ec9..3dc0164e0 100644 --- a/langfuse/api/legacy/score_v1/raw_client.py +++ b/langfuse/api/legacy/score_v1/raw_client.py @@ -18,6 +18,7 @@ from ...core.request_options import RequestOptions from ...core.serialization import convert_and_respect_annotation_metadata from .types.create_score_response import CreateScoreResponse +from .types.create_score_source import CreateScoreSource # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -43,6 +44,7 @@ def create( queue_id: typing.Optional[str] = OMIT, data_type: typing.Optional[ScoreDataType] = OMIT, config_id: typing.Optional[str] = OMIT, + source: typing.Optional[CreateScoreSource] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[CreateScoreResponse]: """ @@ -81,6 +83,9 @@ def create( config_id : typing.Optional[str] Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated. + source : typing.Optional[CreateScoreSource] + The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -107,6 +112,7 @@ def create( "queueId": queue_id, "dataType": data_type, "configId": config_id, + "source": source, }, request_options=request_options, omit=OMIT, @@ -304,6 +310,7 @@ async def create( queue_id: typing.Optional[str] = OMIT, data_type: typing.Optional[ScoreDataType] = OMIT, config_id: typing.Optional[str] = OMIT, + source: typing.Optional[CreateScoreSource] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[CreateScoreResponse]: """ @@ -342,6 +349,9 @@ async def create( config_id : typing.Optional[str] Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated. + source : typing.Optional[CreateScoreSource] + The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint. + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -368,6 +378,7 @@ async def create( "queueId": queue_id, "dataType": data_type, "configId": config_id, + "source": source, }, request_options=request_options, omit=OMIT, diff --git a/langfuse/api/legacy/score_v1/types/__init__.py b/langfuse/api/legacy/score_v1/types/__init__.py index 4a759a978..dde25cbb4 100644 --- a/langfuse/api/legacy/score_v1/types/__init__.py +++ b/langfuse/api/legacy/score_v1/types/__init__.py @@ -8,9 +8,11 @@ if typing.TYPE_CHECKING: from .create_score_request import CreateScoreRequest from .create_score_response import CreateScoreResponse + from .create_score_source import CreateScoreSource _dynamic_imports: typing.Dict[str, str] = { "CreateScoreRequest": ".create_score_request", "CreateScoreResponse": ".create_score_response", + "CreateScoreSource": ".create_score_source", } @@ -41,4 +43,4 @@ def __dir__(): return sorted(lazy_attrs) -__all__ = ["CreateScoreRequest", "CreateScoreResponse"] +__all__ = ["CreateScoreRequest", "CreateScoreResponse", "CreateScoreSource"] diff --git a/langfuse/api/legacy/score_v1/types/create_score_request.py b/langfuse/api/legacy/score_v1/types/create_score_request.py index a0397bdfc..ef498fe6c 100644 --- a/langfuse/api/legacy/score_v1/types/create_score_request.py +++ b/langfuse/api/legacy/score_v1/types/create_score_request.py @@ -8,6 +8,7 @@ from ....commons.types.score_data_type import ScoreDataType from ....core.pydantic_utilities import UniversalBaseModel from ....core.serialization import FieldMetadata +from .create_score_source import CreateScoreSource class CreateScoreRequest(UniversalBaseModel): @@ -70,6 +71,11 @@ class CreateScoreRequest(UniversalBaseModel): Reference a score config on a score. The unique langfuse identifier of a score config. When passing this field, the dataType and stringValue fields are automatically populated. """ + source: typing.Optional[CreateScoreSource] = pydantic.Field(default=None) + """ + The source of the score. Defaults to API. Set to ANNOTATION to prefill scores (e.g. from an LLM) for a human reviewer to verify in an annotation queue. When source is ANNOTATION, a configId is required unless dataType is CORRECTION. EVAL is reserved for internal evaluator outputs and is not accepted on this endpoint. + """ + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( extra="allow", frozen=True ) diff --git a/langfuse/api/legacy/score_v1/types/create_score_source.py b/langfuse/api/legacy/score_v1/types/create_score_source.py new file mode 100644 index 000000000..7364efd61 --- /dev/null +++ b/langfuse/api/legacy/score_v1/types/create_score_source.py @@ -0,0 +1,28 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class CreateScoreSource(enum.StrEnum): + """ + Source values accepted when creating a score via the public REST API. + EVAL is reserved for internal evaluator outputs and is intentionally not + exposed here — use commons.ScoreSource when reading scores. + """ + + API = "API" + ANNOTATION = "ANNOTATION" + + def visit( + self, + api: typing.Callable[[], T_Result], + annotation: typing.Callable[[], T_Result], + ) -> T_Result: + if self is CreateScoreSource.API: + return api() + if self is CreateScoreSource.ANNOTATION: + return annotation() diff --git a/langfuse/api/llm_connections/__init__.py b/langfuse/api/llm_connections/__init__.py index aba7157f1..e4edb011c 100644 --- a/langfuse/api/llm_connections/__init__.py +++ b/langfuse/api/llm_connections/__init__.py @@ -7,12 +7,14 @@ if typing.TYPE_CHECKING: from .types import ( + DeleteLlmConnectionResponse, LlmAdapter, LlmConnection, PaginatedLlmConnections, UpsertLlmConnectionRequest, ) _dynamic_imports: typing.Dict[str, str] = { + "DeleteLlmConnectionResponse": ".types", "LlmAdapter": ".types", "LlmConnection": ".types", "PaginatedLlmConnections": ".types", @@ -48,6 +50,7 @@ def __dir__(): __all__ = [ + "DeleteLlmConnectionResponse", "LlmAdapter", "LlmConnection", "PaginatedLlmConnections", diff --git a/langfuse/api/llm_connections/client.py b/langfuse/api/llm_connections/client.py index 213e55e9f..62c4293ff 100644 --- a/langfuse/api/llm_connections/client.py +++ b/langfuse/api/llm_connections/client.py @@ -5,6 +5,7 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.request_options import RequestOptions from .raw_client import AsyncRawLlmConnectionsClient, RawLlmConnectionsClient +from .types.delete_llm_connection_response import DeleteLlmConnectionResponse from .types.llm_adapter import LlmAdapter from .types.llm_connection import LlmConnection from .types.paginated_llm_connections import PaginatedLlmConnections @@ -153,6 +154,42 @@ def upsert( ) return _response.data + def delete( + self, id: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> DeleteLlmConnectionResponse: + """ + Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused. + + Parameters + ---------- + id : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + DeleteLlmConnectionResponse + + Examples + -------- + from langfuse import LangfuseAPI + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.llm_connections.delete( + id="id", + ) + """ + _response = self._raw_client.delete(id, request_options=request_options) + return _response.data + class AsyncLlmConnectionsClient: def __init__(self, *, client_wrapper: AsyncClientWrapper): @@ -309,3 +346,47 @@ async def main() -> None: request_options=request_options, ) return _response.data + + async def delete( + self, id: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> DeleteLlmConnectionResponse: + """ + Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused. + + Parameters + ---------- + id : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + DeleteLlmConnectionResponse + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.llm_connections.delete( + id="id", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.delete(id, request_options=request_options) + return _response.data diff --git a/langfuse/api/llm_connections/raw_client.py b/langfuse/api/llm_connections/raw_client.py index ef4f87425..30f7beebb 100644 --- a/langfuse/api/llm_connections/raw_client.py +++ b/langfuse/api/llm_connections/raw_client.py @@ -11,8 +11,10 @@ from ..core.api_error import ApiError from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.http_response import AsyncHttpResponse, HttpResponse +from ..core.jsonable_encoder import jsonable_encoder from ..core.pydantic_utilities import parse_obj_as from ..core.request_options import RequestOptions +from .types.delete_llm_connection_response import DeleteLlmConnectionResponse from .types.llm_adapter import LlmAdapter from .types.llm_connection import LlmConnection from .types.paginated_llm_connections import PaginatedLlmConnections @@ -280,6 +282,106 @@ def upsert( body=_response_json, ) + def delete( + self, id: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> HttpResponse[DeleteLlmConnectionResponse]: + """ + Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused. + + Parameters + ---------- + id : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[DeleteLlmConnectionResponse] + """ + _response = self._client_wrapper.httpx_client.request( + f"api/public/llm-connections/{jsonable_encoder(id)}", + method="DELETE", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + DeleteLlmConnectionResponse, + parse_obj_as( + type_=DeleteLlmConnectionResponse, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + class AsyncRawLlmConnectionsClient: def __init__(self, *, client_wrapper: AsyncClientWrapper): @@ -539,3 +641,103 @@ async def upsert( headers=dict(_response.headers), body=_response_json, ) + + async def delete( + self, id: str, *, request_options: typing.Optional[RequestOptions] = None + ) -> AsyncHttpResponse[DeleteLlmConnectionResponse]: + """ + Delete an LLM connection by id. Evaluators that depend on the deleted connection are automatically paused. + + Parameters + ---------- + id : str + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[DeleteLlmConnectionResponse] + """ + _response = await self._client_wrapper.httpx_client.request( + f"api/public/llm-connections/{jsonable_encoder(id)}", + method="DELETE", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + DeleteLlmConnectionResponse, + parse_obj_as( + type_=DeleteLlmConnectionResponse, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) diff --git a/langfuse/api/llm_connections/types/__init__.py b/langfuse/api/llm_connections/types/__init__.py index e6ba89200..ab24fc400 100644 --- a/langfuse/api/llm_connections/types/__init__.py +++ b/langfuse/api/llm_connections/types/__init__.py @@ -6,11 +6,13 @@ from importlib import import_module if typing.TYPE_CHECKING: + from .delete_llm_connection_response import DeleteLlmConnectionResponse from .llm_adapter import LlmAdapter from .llm_connection import LlmConnection from .paginated_llm_connections import PaginatedLlmConnections from .upsert_llm_connection_request import UpsertLlmConnectionRequest _dynamic_imports: typing.Dict[str, str] = { + "DeleteLlmConnectionResponse": ".delete_llm_connection_response", "LlmAdapter": ".llm_adapter", "LlmConnection": ".llm_connection", "PaginatedLlmConnections": ".paginated_llm_connections", @@ -46,6 +48,7 @@ def __dir__(): __all__ = [ + "DeleteLlmConnectionResponse", "LlmAdapter", "LlmConnection", "PaginatedLlmConnections", diff --git a/langfuse/api/llm_connections/types/delete_llm_connection_response.py b/langfuse/api/llm_connections/types/delete_llm_connection_response.py new file mode 100644 index 000000000..080a1904c --- /dev/null +++ b/langfuse/api/llm_connections/types/delete_llm_connection_response.py @@ -0,0 +1,14 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ...core.pydantic_utilities import UniversalBaseModel + + +class DeleteLlmConnectionResponse(UniversalBaseModel): + message: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/observations/client.py b/langfuse/api/observations/client.py index ce0de0cf2..ff7069ede 100644 --- a/langfuse/api/observations/client.py +++ b/langfuse/api/observations/client.py @@ -62,9 +62,10 @@ def get_many( - `io` - input, output - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values) - `model` - providedModelName, internalModelId, modelParameters - - `usage` - usageDetails, costDetails, totalCost + - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName - `prompt` - promptId, promptName, promptVersion - `metrics` - latency, timeToFirstToken + - `trace_context` - tags, release, traceName If not specified, `core` and `basic` field groups are returned. @@ -76,7 +77,7 @@ def get_many( ---------- fields : typing.Optional[str] Comma-separated list of field groups to include in the response. - Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics. + Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context. If not specified, `core` and `basic` field groups are returned. Example: "basic,usage,model" @@ -135,12 +136,12 @@ def get_many( "column": string, // Required. Column to filter on (see available columns below) "operator": string, // Required. Operator based on type: // - datetime: ">", "<", ">=", "<=" - // - string: "=", "contains", "does not contain", "starts with", "ends with" + // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - stringOptions: "any of", "none of" // - categoryOptions: "any of", "none of" // - arrayOptions: "any of", "none of", "all of" // - number: "=", ">", "<", ">=", "<=" - // - stringObject: "=", "contains", "does not contain", "starts with", "ends with" + // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - numberObject: "=", ">", "<", ">=", "<=" // - boolean: "=", "<>" // - null: "is null", "is not null" @@ -192,8 +193,12 @@ def get_many( - `promptVersion` (number) - Associated prompt version ### Structured Data + - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator. + - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator. - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys. + The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected. + ## Filter Examples ```json [ @@ -215,6 +220,12 @@ def get_many( "key": "environment", "operator": "=", "value": "production" + }, + { + "type": "string", + "column": "output", + "operator": "matches", + "value": "needle" } ] ``` @@ -314,9 +325,10 @@ async def get_many( - `io` - input, output - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values) - `model` - providedModelName, internalModelId, modelParameters - - `usage` - usageDetails, costDetails, totalCost + - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName - `prompt` - promptId, promptName, promptVersion - `metrics` - latency, timeToFirstToken + - `trace_context` - tags, release, traceName If not specified, `core` and `basic` field groups are returned. @@ -328,7 +340,7 @@ async def get_many( ---------- fields : typing.Optional[str] Comma-separated list of field groups to include in the response. - Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics. + Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context. If not specified, `core` and `basic` field groups are returned. Example: "basic,usage,model" @@ -387,12 +399,12 @@ async def get_many( "column": string, // Required. Column to filter on (see available columns below) "operator": string, // Required. Operator based on type: // - datetime: ">", "<", ">=", "<=" - // - string: "=", "contains", "does not contain", "starts with", "ends with" + // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - stringOptions: "any of", "none of" // - categoryOptions: "any of", "none of" // - arrayOptions: "any of", "none of", "all of" // - number: "=", ">", "<", ">=", "<=" - // - stringObject: "=", "contains", "does not contain", "starts with", "ends with" + // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - numberObject: "=", ">", "<", ">=", "<=" // - boolean: "=", "<>" // - null: "is null", "is not null" @@ -444,8 +456,12 @@ async def get_many( - `promptVersion` (number) - Associated prompt version ### Structured Data + - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator. + - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator. - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys. + The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected. + ## Filter Examples ```json [ @@ -467,6 +483,12 @@ async def get_many( "key": "environment", "operator": "=", "value": "production" + }, + { + "type": "string", + "column": "output", + "operator": "matches", + "value": "needle" } ] ``` diff --git a/langfuse/api/observations/raw_client.py b/langfuse/api/observations/raw_client.py index 3ae8eab15..f6502014e 100644 --- a/langfuse/api/observations/raw_client.py +++ b/langfuse/api/observations/raw_client.py @@ -60,9 +60,10 @@ def get_many( - `io` - input, output - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values) - `model` - providedModelName, internalModelId, modelParameters - - `usage` - usageDetails, costDetails, totalCost + - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName - `prompt` - promptId, promptName, promptVersion - `metrics` - latency, timeToFirstToken + - `trace_context` - tags, release, traceName If not specified, `core` and `basic` field groups are returned. @@ -74,7 +75,7 @@ def get_many( ---------- fields : typing.Optional[str] Comma-separated list of field groups to include in the response. - Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics. + Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context. If not specified, `core` and `basic` field groups are returned. Example: "basic,usage,model" @@ -133,12 +134,12 @@ def get_many( "column": string, // Required. Column to filter on (see available columns below) "operator": string, // Required. Operator based on type: // - datetime: ">", "<", ">=", "<=" - // - string: "=", "contains", "does not contain", "starts with", "ends with" + // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - stringOptions: "any of", "none of" // - categoryOptions: "any of", "none of" // - arrayOptions: "any of", "none of", "all of" // - number: "=", ">", "<", ">=", "<=" - // - stringObject: "=", "contains", "does not contain", "starts with", "ends with" + // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - numberObject: "=", ">", "<", ">=", "<=" // - boolean: "=", "<>" // - null: "is null", "is not null" @@ -190,8 +191,12 @@ def get_many( - `promptVersion` (number) - Associated prompt version ### Structured Data + - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator. + - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator. - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys. + The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected. + ## Filter Examples ```json [ @@ -213,6 +218,12 @@ def get_many( "key": "environment", "operator": "=", "value": "production" + }, + { + "type": "string", + "column": "output", + "operator": "matches", + "value": "needle" } ] ``` @@ -371,9 +382,10 @@ async def get_many( - `io` - input, output - `metadata` - metadata (truncated to 200 chars by default, use `expandMetadata` to get full values) - `model` - providedModelName, internalModelId, modelParameters - - `usage` - usageDetails, costDetails, totalCost + - `usage` - usageDetails, costDetails, totalCost, usagePricingTierName - `prompt` - promptId, promptName, promptVersion - `metrics` - latency, timeToFirstToken + - `trace_context` - tags, release, traceName If not specified, `core` and `basic` field groups are returned. @@ -385,7 +397,7 @@ async def get_many( ---------- fields : typing.Optional[str] Comma-separated list of field groups to include in the response. - Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics. + Available groups: core, basic, time, io, metadata, model, usage, prompt, metrics, trace_context. If not specified, `core` and `basic` field groups are returned. Example: "basic,usage,model" @@ -444,12 +456,12 @@ async def get_many( "column": string, // Required. Column to filter on (see available columns below) "operator": string, // Required. Operator based on type: // - datetime: ">", "<", ">=", "<=" - // - string: "=", "contains", "does not contain", "starts with", "ends with" + // - string: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - stringOptions: "any of", "none of" // - categoryOptions: "any of", "none of" // - arrayOptions: "any of", "none of", "all of" // - number: "=", ">", "<", ">=", "<=" - // - stringObject: "=", "contains", "does not contain", "starts with", "ends with" + // - stringObject: "=", "contains", "does not contain", "starts with", "ends with", "matches" // - numberObject: "=", ">", "<", ">=", "<=" // - boolean: "=", "<>" // - null: "is null", "is not null" @@ -501,8 +513,12 @@ async def get_many( - `promptVersion` (number) - Associated prompt version ### Structured Data + - `input` (string) - Observation input. Supports accelerated token search with the `matches` operator. + - `output` (string) - Observation output. Supports accelerated token search with the `matches` operator. - `metadata` (stringObject/numberObject/categoryOptions) - Metadata key-value pairs. Use `key` parameter to filter on specific metadata keys. + The `matches` operator is only supported for `input`, `output`, and stringObject `metadata` filters. It performs token-based full-text search using the events table text indexes. Case sensitivity differs by target: `input` and `output` matches are case-insensitive, while metadata value matches are case-sensitive. Use `contains` for substring semantics. Any v2 `input` or `output` filter must be accompanied by at least one `=` or `matches` filter on `input` or `output`; standalone `contains`, `starts with`, `ends with`, and `does not contain` filters on these columns are rejected. + ## Filter Examples ```json [ @@ -524,6 +540,12 @@ async def get_many( "key": "environment", "operator": "=", "value": "production" + }, + { + "type": "string", + "column": "output", + "operator": "matches", + "value": "needle" } ] ``` diff --git a/langfuse/api/score_configs/client.py b/langfuse/api/score_configs/client.py index da6626043..b900e3f5d 100644 --- a/langfuse/api/score_configs/client.py +++ b/langfuse/api/score_configs/client.py @@ -46,6 +46,7 @@ def create( Parameters ---------- name : str + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. data_type : ScoreConfigDataType @@ -204,7 +205,7 @@ def update( The status of the score config showing if it is archived or not name : typing.Optional[str] - The name of the score config + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. categories : typing.Optional[typing.Sequence[ConfigCategory]] Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed @@ -286,6 +287,7 @@ async def create( Parameters ---------- name : str + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. data_type : ScoreConfigDataType @@ -468,7 +470,7 @@ async def update( The status of the score config showing if it is archived or not name : typing.Optional[str] - The name of the score config + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. categories : typing.Optional[typing.Sequence[ConfigCategory]] Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed diff --git a/langfuse/api/score_configs/raw_client.py b/langfuse/api/score_configs/raw_client.py index 8021940c6..11de026c6 100644 --- a/langfuse/api/score_configs/raw_client.py +++ b/langfuse/api/score_configs/raw_client.py @@ -45,6 +45,7 @@ def create( Parameters ---------- name : str + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. data_type : ScoreConfigDataType @@ -400,7 +401,7 @@ def update( The status of the score config showing if it is archived or not name : typing.Optional[str] - The name of the score config + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. categories : typing.Optional[typing.Sequence[ConfigCategory]] Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed @@ -539,6 +540,7 @@ async def create( Parameters ---------- name : str + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. data_type : ScoreConfigDataType @@ -894,7 +896,7 @@ async def update( The status of the score config showing if it is archived or not name : typing.Optional[str] - The name of the score config + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. categories : typing.Optional[typing.Sequence[ConfigCategory]] Configure custom categories for categorical scores. Pass a list of objects with `label` and `value` properties. Categories are autogenerated for boolean configs and cannot be passed diff --git a/langfuse/api/score_configs/types/create_score_config_request.py b/langfuse/api/score_configs/types/create_score_config_request.py index 1c23fd91e..0edb01407 100644 --- a/langfuse/api/score_configs/types/create_score_config_request.py +++ b/langfuse/api/score_configs/types/create_score_config_request.py @@ -11,7 +11,11 @@ class CreateScoreConfigRequest(UniversalBaseModel): - name: str + name: str = pydantic.Field() + """ + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. + """ + data_type: typing_extensions.Annotated[ ScoreConfigDataType, FieldMetadata(alias="dataType") ] diff --git a/langfuse/api/score_configs/types/update_score_config_request.py b/langfuse/api/score_configs/types/update_score_config_request.py index 5237c544f..28c4248e9 100644 --- a/langfuse/api/score_configs/types/update_score_config_request.py +++ b/langfuse/api/score_configs/types/update_score_config_request.py @@ -19,7 +19,7 @@ class UpdateScoreConfigRequest(UniversalBaseModel): name: typing.Optional[str] = pydantic.Field(default=None) """ - The name of the score config + Name of the score config. Max 35 characters. Only letters, numbers, underscores, spaces, periods, parentheses, and hyphens are allowed. """ categories: typing.Optional[typing.List[ConfigCategory]] = pydantic.Field( diff --git a/langfuse/api/scores/client.py b/langfuse/api/scores/client.py index 91db2c416..566530e21 100644 --- a/langfuse/api/scores/client.py +++ b/langfuse/api/scores/client.py @@ -62,7 +62,7 @@ def get_many( Page number, starts at 1. limit : typing.Optional[int] - Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit. + Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit. user_id : typing.Optional[str] Retrieve only scores with this userId associated to the trace. @@ -258,7 +258,7 @@ async def get_many( Page number, starts at 1. limit : typing.Optional[int] - Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit. + Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit. user_id : typing.Optional[str] Retrieve only scores with this userId associated to the trace. diff --git a/langfuse/api/scores/raw_client.py b/langfuse/api/scores/raw_client.py index 2dc16e688..d1508545c 100644 --- a/langfuse/api/scores/raw_client.py +++ b/langfuse/api/scores/raw_client.py @@ -61,7 +61,7 @@ def get_many( Page number, starts at 1. limit : typing.Optional[int] - Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit. + Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit. user_id : typing.Optional[str] Retrieve only scores with this userId associated to the trace. @@ -378,7 +378,7 @@ async def get_many( Page number, starts at 1. limit : typing.Optional[int] - Limit of items per page. If you encounter api issues due to too large page sizes, try to reduce the limit. + Limit of items per page. Maximum 100. Defaults to 50. Requests with a limit greater than 100 return HTTP 400. If you encounter api issues due to too large page sizes, try to reduce the limit. user_id : typing.Optional[str] Retrieve only scores with this userId associated to the trace. diff --git a/langfuse/api/unstable/__init__.py b/langfuse/api/unstable/__init__.py new file mode 100644 index 000000000..75aafdc24 --- /dev/null +++ b/langfuse/api/unstable/__init__.py @@ -0,0 +1,267 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .errors import ( + AccessDeniedError, + BadRequestError, + ConflictError, + InternalServerError, + MethodNotAllowedError, + NotFoundError, + PublicApiError, + PublicApiErrorCode, + PublicApiErrorDetails, + PublicApiValidationIssue, + TooManyRequestsError, + UnauthorizedError, + UnprocessableContentError, + ) + from . import commons, errors, evaluation_rules, evaluators + from .commons import ( + ArrayOptionsEvaluationRuleFilter, + BooleanEvaluationRuleFilter, + CategoryOptionsEvaluationRuleFilter, + DateTimeEvaluationRuleFilter, + EvaluationRuleArrayOptionsFilterOperator, + EvaluationRuleBooleanFilterOperator, + EvaluationRuleFilter, + EvaluationRuleFilter_ArrayOptions, + EvaluationRuleFilter_Boolean, + EvaluationRuleFilter_CategoryOptions, + EvaluationRuleFilter_Datetime, + EvaluationRuleFilter_Null, + EvaluationRuleFilter_Number, + EvaluationRuleFilter_NumberObject, + EvaluationRuleFilter_String, + EvaluationRuleFilter_StringObject, + EvaluationRuleFilter_StringOptions, + EvaluationRuleMapping, + EvaluationRuleMappingSource, + EvaluationRuleNullFilterOperator, + EvaluationRuleNumberFilterOperator, + EvaluationRuleOptionsFilterOperator, + EvaluationRuleStatus, + EvaluationRuleStringFilterOperator, + EvaluationRuleTarget, + EvaluatorModelConfig, + EvaluatorOutputDataType, + EvaluatorOutputDefinition, + EvaluatorOutputDefinition_Boolean, + EvaluatorOutputDefinition_Categorical, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, + EvaluatorScope, + EvaluatorType, + NullEvaluationRuleFilter, + NumberEvaluationRuleFilter, + NumberObjectEvaluationRuleFilter, + PublicBooleanEvaluatorOutputDefinition, + PublicCategoricalEvaluatorOutputDefinition, + PublicCategoricalEvaluatorOutputScoreDefinition, + PublicEvaluatorOutputDefinition, + PublicEvaluatorOutputDefinition_Boolean, + PublicEvaluatorOutputDefinition_Categorical, + PublicEvaluatorOutputDefinition_Numeric, + PublicNumericEvaluatorOutputDefinition, + StringEvaluationRuleFilter, + StringObjectEvaluationRuleFilter, + StringOptionsEvaluationRuleFilter, + ) + from .evaluation_rules import ( + CreateEvaluationRuleRequest, + DeleteEvaluationRuleResponse, + EvaluationRule, + EvaluationRuleEvaluator, + EvaluationRuleEvaluatorReference, + EvaluationRules, + UpdateEvaluationRuleRequest, + ) + from .evaluators import CreateEvaluatorRequest, Evaluator, Evaluators +_dynamic_imports: typing.Dict[str, str] = { + "AccessDeniedError": ".errors", + "ArrayOptionsEvaluationRuleFilter": ".commons", + "BadRequestError": ".errors", + "BooleanEvaluationRuleFilter": ".commons", + "CategoryOptionsEvaluationRuleFilter": ".commons", + "ConflictError": ".errors", + "CreateEvaluationRuleRequest": ".evaluation_rules", + "CreateEvaluatorRequest": ".evaluators", + "DateTimeEvaluationRuleFilter": ".commons", + "DeleteEvaluationRuleResponse": ".evaluation_rules", + "EvaluationRule": ".evaluation_rules", + "EvaluationRuleArrayOptionsFilterOperator": ".commons", + "EvaluationRuleBooleanFilterOperator": ".commons", + "EvaluationRuleEvaluator": ".evaluation_rules", + "EvaluationRuleEvaluatorReference": ".evaluation_rules", + "EvaluationRuleFilter": ".commons", + "EvaluationRuleFilter_ArrayOptions": ".commons", + "EvaluationRuleFilter_Boolean": ".commons", + "EvaluationRuleFilter_CategoryOptions": ".commons", + "EvaluationRuleFilter_Datetime": ".commons", + "EvaluationRuleFilter_Null": ".commons", + "EvaluationRuleFilter_Number": ".commons", + "EvaluationRuleFilter_NumberObject": ".commons", + "EvaluationRuleFilter_String": ".commons", + "EvaluationRuleFilter_StringObject": ".commons", + "EvaluationRuleFilter_StringOptions": ".commons", + "EvaluationRuleMapping": ".commons", + "EvaluationRuleMappingSource": ".commons", + "EvaluationRuleNullFilterOperator": ".commons", + "EvaluationRuleNumberFilterOperator": ".commons", + "EvaluationRuleOptionsFilterOperator": ".commons", + "EvaluationRuleStatus": ".commons", + "EvaluationRuleStringFilterOperator": ".commons", + "EvaluationRuleTarget": ".commons", + "EvaluationRules": ".evaluation_rules", + "Evaluator": ".evaluators", + "EvaluatorModelConfig": ".commons", + "EvaluatorOutputDataType": ".commons", + "EvaluatorOutputDefinition": ".commons", + "EvaluatorOutputDefinition_Boolean": ".commons", + "EvaluatorOutputDefinition_Categorical": ".commons", + "EvaluatorOutputDefinition_Numeric": ".commons", + "EvaluatorOutputFieldDefinition": ".commons", + "EvaluatorScope": ".commons", + "EvaluatorType": ".commons", + "Evaluators": ".evaluators", + "InternalServerError": ".errors", + "MethodNotAllowedError": ".errors", + "NotFoundError": ".errors", + "NullEvaluationRuleFilter": ".commons", + "NumberEvaluationRuleFilter": ".commons", + "NumberObjectEvaluationRuleFilter": ".commons", + "PublicApiError": ".errors", + "PublicApiErrorCode": ".errors", + "PublicApiErrorDetails": ".errors", + "PublicApiValidationIssue": ".errors", + "PublicBooleanEvaluatorOutputDefinition": ".commons", + "PublicCategoricalEvaluatorOutputDefinition": ".commons", + "PublicCategoricalEvaluatorOutputScoreDefinition": ".commons", + "PublicEvaluatorOutputDefinition": ".commons", + "PublicEvaluatorOutputDefinition_Boolean": ".commons", + "PublicEvaluatorOutputDefinition_Categorical": ".commons", + "PublicEvaluatorOutputDefinition_Numeric": ".commons", + "PublicNumericEvaluatorOutputDefinition": ".commons", + "StringEvaluationRuleFilter": ".commons", + "StringObjectEvaluationRuleFilter": ".commons", + "StringOptionsEvaluationRuleFilter": ".commons", + "TooManyRequestsError": ".errors", + "UnauthorizedError": ".errors", + "UnprocessableContentError": ".errors", + "UpdateEvaluationRuleRequest": ".evaluation_rules", + "commons": ".commons", + "errors": ".errors", + "evaluation_rules": ".evaluation_rules", + "evaluators": ".evaluators", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "AccessDeniedError", + "ArrayOptionsEvaluationRuleFilter", + "BadRequestError", + "BooleanEvaluationRuleFilter", + "CategoryOptionsEvaluationRuleFilter", + "ConflictError", + "CreateEvaluationRuleRequest", + "CreateEvaluatorRequest", + "DateTimeEvaluationRuleFilter", + "DeleteEvaluationRuleResponse", + "EvaluationRule", + "EvaluationRuleArrayOptionsFilterOperator", + "EvaluationRuleBooleanFilterOperator", + "EvaluationRuleEvaluator", + "EvaluationRuleEvaluatorReference", + "EvaluationRuleFilter", + "EvaluationRuleFilter_ArrayOptions", + "EvaluationRuleFilter_Boolean", + "EvaluationRuleFilter_CategoryOptions", + "EvaluationRuleFilter_Datetime", + "EvaluationRuleFilter_Null", + "EvaluationRuleFilter_Number", + "EvaluationRuleFilter_NumberObject", + "EvaluationRuleFilter_String", + "EvaluationRuleFilter_StringObject", + "EvaluationRuleFilter_StringOptions", + "EvaluationRuleMapping", + "EvaluationRuleMappingSource", + "EvaluationRuleNullFilterOperator", + "EvaluationRuleNumberFilterOperator", + "EvaluationRuleOptionsFilterOperator", + "EvaluationRuleStatus", + "EvaluationRuleStringFilterOperator", + "EvaluationRuleTarget", + "EvaluationRules", + "Evaluator", + "EvaluatorModelConfig", + "EvaluatorOutputDataType", + "EvaluatorOutputDefinition", + "EvaluatorOutputDefinition_Boolean", + "EvaluatorOutputDefinition_Categorical", + "EvaluatorOutputDefinition_Numeric", + "EvaluatorOutputFieldDefinition", + "EvaluatorScope", + "EvaluatorType", + "Evaluators", + "InternalServerError", + "MethodNotAllowedError", + "NotFoundError", + "NullEvaluationRuleFilter", + "NumberEvaluationRuleFilter", + "NumberObjectEvaluationRuleFilter", + "PublicApiError", + "PublicApiErrorCode", + "PublicApiErrorDetails", + "PublicApiValidationIssue", + "PublicBooleanEvaluatorOutputDefinition", + "PublicCategoricalEvaluatorOutputDefinition", + "PublicCategoricalEvaluatorOutputScoreDefinition", + "PublicEvaluatorOutputDefinition", + "PublicEvaluatorOutputDefinition_Boolean", + "PublicEvaluatorOutputDefinition_Categorical", + "PublicEvaluatorOutputDefinition_Numeric", + "PublicNumericEvaluatorOutputDefinition", + "StringEvaluationRuleFilter", + "StringObjectEvaluationRuleFilter", + "StringOptionsEvaluationRuleFilter", + "TooManyRequestsError", + "UnauthorizedError", + "UnprocessableContentError", + "UpdateEvaluationRuleRequest", + "commons", + "errors", + "evaluation_rules", + "evaluators", +] diff --git a/langfuse/api/unstable/client.py b/langfuse/api/unstable/client.py new file mode 100644 index 000000000..5c3ac32d7 --- /dev/null +++ b/langfuse/api/unstable/client.py @@ -0,0 +1,91 @@ +# This file was auto-generated by Fern from our API Definition. + +from __future__ import annotations + +import typing + +from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from .raw_client import AsyncRawUnstableClient, RawUnstableClient + +if typing.TYPE_CHECKING: + from .evaluation_rules.client import ( + AsyncEvaluationRulesClient, + EvaluationRulesClient, + ) + from .evaluators.client import AsyncEvaluatorsClient, EvaluatorsClient + + +class UnstableClient: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._raw_client = RawUnstableClient(client_wrapper=client_wrapper) + self._client_wrapper = client_wrapper + self._evaluation_rules: typing.Optional[EvaluationRulesClient] = None + self._evaluators: typing.Optional[EvaluatorsClient] = None + + @property + def with_raw_response(self) -> RawUnstableClient: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + RawUnstableClient + """ + return self._raw_client + + @property + def evaluation_rules(self): + if self._evaluation_rules is None: + from .evaluation_rules.client import EvaluationRulesClient # noqa: E402 + + self._evaluation_rules = EvaluationRulesClient( + client_wrapper=self._client_wrapper + ) + return self._evaluation_rules + + @property + def evaluators(self): + if self._evaluators is None: + from .evaluators.client import EvaluatorsClient # noqa: E402 + + self._evaluators = EvaluatorsClient(client_wrapper=self._client_wrapper) + return self._evaluators + + +class AsyncUnstableClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._raw_client = AsyncRawUnstableClient(client_wrapper=client_wrapper) + self._client_wrapper = client_wrapper + self._evaluation_rules: typing.Optional[AsyncEvaluationRulesClient] = None + self._evaluators: typing.Optional[AsyncEvaluatorsClient] = None + + @property + def with_raw_response(self) -> AsyncRawUnstableClient: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + AsyncRawUnstableClient + """ + return self._raw_client + + @property + def evaluation_rules(self): + if self._evaluation_rules is None: + from .evaluation_rules.client import AsyncEvaluationRulesClient # noqa: E402 + + self._evaluation_rules = AsyncEvaluationRulesClient( + client_wrapper=self._client_wrapper + ) + return self._evaluation_rules + + @property + def evaluators(self): + if self._evaluators is None: + from .evaluators.client import AsyncEvaluatorsClient # noqa: E402 + + self._evaluators = AsyncEvaluatorsClient( + client_wrapper=self._client_wrapper + ) + return self._evaluators diff --git a/langfuse/api/unstable/commons/__init__.py b/langfuse/api/unstable/commons/__init__.py new file mode 100644 index 000000000..13d9571ff --- /dev/null +++ b/langfuse/api/unstable/commons/__init__.py @@ -0,0 +1,187 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .types import ( + ArrayOptionsEvaluationRuleFilter, + BooleanEvaluationRuleFilter, + CategoryOptionsEvaluationRuleFilter, + DateTimeEvaluationRuleFilter, + EvaluationRuleArrayOptionsFilterOperator, + EvaluationRuleBooleanFilterOperator, + EvaluationRuleFilter, + EvaluationRuleFilter_ArrayOptions, + EvaluationRuleFilter_Boolean, + EvaluationRuleFilter_CategoryOptions, + EvaluationRuleFilter_Datetime, + EvaluationRuleFilter_Null, + EvaluationRuleFilter_Number, + EvaluationRuleFilter_NumberObject, + EvaluationRuleFilter_String, + EvaluationRuleFilter_StringObject, + EvaluationRuleFilter_StringOptions, + EvaluationRuleMapping, + EvaluationRuleMappingSource, + EvaluationRuleNullFilterOperator, + EvaluationRuleNumberFilterOperator, + EvaluationRuleOptionsFilterOperator, + EvaluationRuleStatus, + EvaluationRuleStringFilterOperator, + EvaluationRuleTarget, + EvaluatorModelConfig, + EvaluatorOutputDataType, + EvaluatorOutputDefinition, + EvaluatorOutputDefinition_Boolean, + EvaluatorOutputDefinition_Categorical, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, + EvaluatorScope, + EvaluatorType, + NullEvaluationRuleFilter, + NumberEvaluationRuleFilter, + NumberObjectEvaluationRuleFilter, + PublicBooleanEvaluatorOutputDefinition, + PublicCategoricalEvaluatorOutputDefinition, + PublicCategoricalEvaluatorOutputScoreDefinition, + PublicEvaluatorOutputDefinition, + PublicEvaluatorOutputDefinition_Boolean, + PublicEvaluatorOutputDefinition_Categorical, + PublicEvaluatorOutputDefinition_Numeric, + PublicNumericEvaluatorOutputDefinition, + StringEvaluationRuleFilter, + StringObjectEvaluationRuleFilter, + StringOptionsEvaluationRuleFilter, + ) +_dynamic_imports: typing.Dict[str, str] = { + "ArrayOptionsEvaluationRuleFilter": ".types", + "BooleanEvaluationRuleFilter": ".types", + "CategoryOptionsEvaluationRuleFilter": ".types", + "DateTimeEvaluationRuleFilter": ".types", + "EvaluationRuleArrayOptionsFilterOperator": ".types", + "EvaluationRuleBooleanFilterOperator": ".types", + "EvaluationRuleFilter": ".types", + "EvaluationRuleFilter_ArrayOptions": ".types", + "EvaluationRuleFilter_Boolean": ".types", + "EvaluationRuleFilter_CategoryOptions": ".types", + "EvaluationRuleFilter_Datetime": ".types", + "EvaluationRuleFilter_Null": ".types", + "EvaluationRuleFilter_Number": ".types", + "EvaluationRuleFilter_NumberObject": ".types", + "EvaluationRuleFilter_String": ".types", + "EvaluationRuleFilter_StringObject": ".types", + "EvaluationRuleFilter_StringOptions": ".types", + "EvaluationRuleMapping": ".types", + "EvaluationRuleMappingSource": ".types", + "EvaluationRuleNullFilterOperator": ".types", + "EvaluationRuleNumberFilterOperator": ".types", + "EvaluationRuleOptionsFilterOperator": ".types", + "EvaluationRuleStatus": ".types", + "EvaluationRuleStringFilterOperator": ".types", + "EvaluationRuleTarget": ".types", + "EvaluatorModelConfig": ".types", + "EvaluatorOutputDataType": ".types", + "EvaluatorOutputDefinition": ".types", + "EvaluatorOutputDefinition_Boolean": ".types", + "EvaluatorOutputDefinition_Categorical": ".types", + "EvaluatorOutputDefinition_Numeric": ".types", + "EvaluatorOutputFieldDefinition": ".types", + "EvaluatorScope": ".types", + "EvaluatorType": ".types", + "NullEvaluationRuleFilter": ".types", + "NumberEvaluationRuleFilter": ".types", + "NumberObjectEvaluationRuleFilter": ".types", + "PublicBooleanEvaluatorOutputDefinition": ".types", + "PublicCategoricalEvaluatorOutputDefinition": ".types", + "PublicCategoricalEvaluatorOutputScoreDefinition": ".types", + "PublicEvaluatorOutputDefinition": ".types", + "PublicEvaluatorOutputDefinition_Boolean": ".types", + "PublicEvaluatorOutputDefinition_Categorical": ".types", + "PublicEvaluatorOutputDefinition_Numeric": ".types", + "PublicNumericEvaluatorOutputDefinition": ".types", + "StringEvaluationRuleFilter": ".types", + "StringObjectEvaluationRuleFilter": ".types", + "StringOptionsEvaluationRuleFilter": ".types", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "ArrayOptionsEvaluationRuleFilter", + "BooleanEvaluationRuleFilter", + "CategoryOptionsEvaluationRuleFilter", + "DateTimeEvaluationRuleFilter", + "EvaluationRuleArrayOptionsFilterOperator", + "EvaluationRuleBooleanFilterOperator", + "EvaluationRuleFilter", + "EvaluationRuleFilter_ArrayOptions", + "EvaluationRuleFilter_Boolean", + "EvaluationRuleFilter_CategoryOptions", + "EvaluationRuleFilter_Datetime", + "EvaluationRuleFilter_Null", + "EvaluationRuleFilter_Number", + "EvaluationRuleFilter_NumberObject", + "EvaluationRuleFilter_String", + "EvaluationRuleFilter_StringObject", + "EvaluationRuleFilter_StringOptions", + "EvaluationRuleMapping", + "EvaluationRuleMappingSource", + "EvaluationRuleNullFilterOperator", + "EvaluationRuleNumberFilterOperator", + "EvaluationRuleOptionsFilterOperator", + "EvaluationRuleStatus", + "EvaluationRuleStringFilterOperator", + "EvaluationRuleTarget", + "EvaluatorModelConfig", + "EvaluatorOutputDataType", + "EvaluatorOutputDefinition", + "EvaluatorOutputDefinition_Boolean", + "EvaluatorOutputDefinition_Categorical", + "EvaluatorOutputDefinition_Numeric", + "EvaluatorOutputFieldDefinition", + "EvaluatorScope", + "EvaluatorType", + "NullEvaluationRuleFilter", + "NumberEvaluationRuleFilter", + "NumberObjectEvaluationRuleFilter", + "PublicBooleanEvaluatorOutputDefinition", + "PublicCategoricalEvaluatorOutputDefinition", + "PublicCategoricalEvaluatorOutputScoreDefinition", + "PublicEvaluatorOutputDefinition", + "PublicEvaluatorOutputDefinition_Boolean", + "PublicEvaluatorOutputDefinition_Categorical", + "PublicEvaluatorOutputDefinition_Numeric", + "PublicNumericEvaluatorOutputDefinition", + "StringEvaluationRuleFilter", + "StringObjectEvaluationRuleFilter", + "StringOptionsEvaluationRuleFilter", +] diff --git a/langfuse/api/unstable/commons/types/__init__.py b/langfuse/api/unstable/commons/types/__init__.py new file mode 100644 index 000000000..a0e7d9f9d --- /dev/null +++ b/langfuse/api/unstable/commons/types/__init__.py @@ -0,0 +1,211 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .array_options_evaluation_rule_filter import ArrayOptionsEvaluationRuleFilter + from .boolean_evaluation_rule_filter import BooleanEvaluationRuleFilter + from .category_options_evaluation_rule_filter import ( + CategoryOptionsEvaluationRuleFilter, + ) + from .date_time_evaluation_rule_filter import DateTimeEvaluationRuleFilter + from .evaluation_rule_array_options_filter_operator import ( + EvaluationRuleArrayOptionsFilterOperator, + ) + from .evaluation_rule_boolean_filter_operator import ( + EvaluationRuleBooleanFilterOperator, + ) + from .evaluation_rule_filter import ( + EvaluationRuleFilter, + EvaluationRuleFilter_ArrayOptions, + EvaluationRuleFilter_Boolean, + EvaluationRuleFilter_CategoryOptions, + EvaluationRuleFilter_Datetime, + EvaluationRuleFilter_Null, + EvaluationRuleFilter_Number, + EvaluationRuleFilter_NumberObject, + EvaluationRuleFilter_String, + EvaluationRuleFilter_StringObject, + EvaluationRuleFilter_StringOptions, + ) + from .evaluation_rule_mapping import EvaluationRuleMapping + from .evaluation_rule_mapping_source import EvaluationRuleMappingSource + from .evaluation_rule_null_filter_operator import EvaluationRuleNullFilterOperator + from .evaluation_rule_number_filter_operator import ( + EvaluationRuleNumberFilterOperator, + ) + from .evaluation_rule_options_filter_operator import ( + EvaluationRuleOptionsFilterOperator, + ) + from .evaluation_rule_status import EvaluationRuleStatus + from .evaluation_rule_string_filter_operator import ( + EvaluationRuleStringFilterOperator, + ) + from .evaluation_rule_target import EvaluationRuleTarget + from .evaluator_model_config import EvaluatorModelConfig + from .evaluator_output_data_type import EvaluatorOutputDataType + from .evaluator_output_definition import ( + EvaluatorOutputDefinition, + EvaluatorOutputDefinition_Boolean, + EvaluatorOutputDefinition_Categorical, + EvaluatorOutputDefinition_Numeric, + ) + from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition + from .evaluator_scope import EvaluatorScope + from .evaluator_type import EvaluatorType + from .null_evaluation_rule_filter import NullEvaluationRuleFilter + from .number_evaluation_rule_filter import NumberEvaluationRuleFilter + from .number_object_evaluation_rule_filter import NumberObjectEvaluationRuleFilter + from .public_boolean_evaluator_output_definition import ( + PublicBooleanEvaluatorOutputDefinition, + ) + from .public_categorical_evaluator_output_definition import ( + PublicCategoricalEvaluatorOutputDefinition, + ) + from .public_categorical_evaluator_output_score_definition import ( + PublicCategoricalEvaluatorOutputScoreDefinition, + ) + from .public_evaluator_output_definition import ( + PublicEvaluatorOutputDefinition, + PublicEvaluatorOutputDefinition_Boolean, + PublicEvaluatorOutputDefinition_Categorical, + PublicEvaluatorOutputDefinition_Numeric, + ) + from .public_numeric_evaluator_output_definition import ( + PublicNumericEvaluatorOutputDefinition, + ) + from .string_evaluation_rule_filter import StringEvaluationRuleFilter + from .string_object_evaluation_rule_filter import StringObjectEvaluationRuleFilter + from .string_options_evaluation_rule_filter import StringOptionsEvaluationRuleFilter +_dynamic_imports: typing.Dict[str, str] = { + "ArrayOptionsEvaluationRuleFilter": ".array_options_evaluation_rule_filter", + "BooleanEvaluationRuleFilter": ".boolean_evaluation_rule_filter", + "CategoryOptionsEvaluationRuleFilter": ".category_options_evaluation_rule_filter", + "DateTimeEvaluationRuleFilter": ".date_time_evaluation_rule_filter", + "EvaluationRuleArrayOptionsFilterOperator": ".evaluation_rule_array_options_filter_operator", + "EvaluationRuleBooleanFilterOperator": ".evaluation_rule_boolean_filter_operator", + "EvaluationRuleFilter": ".evaluation_rule_filter", + "EvaluationRuleFilter_ArrayOptions": ".evaluation_rule_filter", + "EvaluationRuleFilter_Boolean": ".evaluation_rule_filter", + "EvaluationRuleFilter_CategoryOptions": ".evaluation_rule_filter", + "EvaluationRuleFilter_Datetime": ".evaluation_rule_filter", + "EvaluationRuleFilter_Null": ".evaluation_rule_filter", + "EvaluationRuleFilter_Number": ".evaluation_rule_filter", + "EvaluationRuleFilter_NumberObject": ".evaluation_rule_filter", + "EvaluationRuleFilter_String": ".evaluation_rule_filter", + "EvaluationRuleFilter_StringObject": ".evaluation_rule_filter", + "EvaluationRuleFilter_StringOptions": ".evaluation_rule_filter", + "EvaluationRuleMapping": ".evaluation_rule_mapping", + "EvaluationRuleMappingSource": ".evaluation_rule_mapping_source", + "EvaluationRuleNullFilterOperator": ".evaluation_rule_null_filter_operator", + "EvaluationRuleNumberFilterOperator": ".evaluation_rule_number_filter_operator", + "EvaluationRuleOptionsFilterOperator": ".evaluation_rule_options_filter_operator", + "EvaluationRuleStatus": ".evaluation_rule_status", + "EvaluationRuleStringFilterOperator": ".evaluation_rule_string_filter_operator", + "EvaluationRuleTarget": ".evaluation_rule_target", + "EvaluatorModelConfig": ".evaluator_model_config", + "EvaluatorOutputDataType": ".evaluator_output_data_type", + "EvaluatorOutputDefinition": ".evaluator_output_definition", + "EvaluatorOutputDefinition_Boolean": ".evaluator_output_definition", + "EvaluatorOutputDefinition_Categorical": ".evaluator_output_definition", + "EvaluatorOutputDefinition_Numeric": ".evaluator_output_definition", + "EvaluatorOutputFieldDefinition": ".evaluator_output_field_definition", + "EvaluatorScope": ".evaluator_scope", + "EvaluatorType": ".evaluator_type", + "NullEvaluationRuleFilter": ".null_evaluation_rule_filter", + "NumberEvaluationRuleFilter": ".number_evaluation_rule_filter", + "NumberObjectEvaluationRuleFilter": ".number_object_evaluation_rule_filter", + "PublicBooleanEvaluatorOutputDefinition": ".public_boolean_evaluator_output_definition", + "PublicCategoricalEvaluatorOutputDefinition": ".public_categorical_evaluator_output_definition", + "PublicCategoricalEvaluatorOutputScoreDefinition": ".public_categorical_evaluator_output_score_definition", + "PublicEvaluatorOutputDefinition": ".public_evaluator_output_definition", + "PublicEvaluatorOutputDefinition_Boolean": ".public_evaluator_output_definition", + "PublicEvaluatorOutputDefinition_Categorical": ".public_evaluator_output_definition", + "PublicEvaluatorOutputDefinition_Numeric": ".public_evaluator_output_definition", + "PublicNumericEvaluatorOutputDefinition": ".public_numeric_evaluator_output_definition", + "StringEvaluationRuleFilter": ".string_evaluation_rule_filter", + "StringObjectEvaluationRuleFilter": ".string_object_evaluation_rule_filter", + "StringOptionsEvaluationRuleFilter": ".string_options_evaluation_rule_filter", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "ArrayOptionsEvaluationRuleFilter", + "BooleanEvaluationRuleFilter", + "CategoryOptionsEvaluationRuleFilter", + "DateTimeEvaluationRuleFilter", + "EvaluationRuleArrayOptionsFilterOperator", + "EvaluationRuleBooleanFilterOperator", + "EvaluationRuleFilter", + "EvaluationRuleFilter_ArrayOptions", + "EvaluationRuleFilter_Boolean", + "EvaluationRuleFilter_CategoryOptions", + "EvaluationRuleFilter_Datetime", + "EvaluationRuleFilter_Null", + "EvaluationRuleFilter_Number", + "EvaluationRuleFilter_NumberObject", + "EvaluationRuleFilter_String", + "EvaluationRuleFilter_StringObject", + "EvaluationRuleFilter_StringOptions", + "EvaluationRuleMapping", + "EvaluationRuleMappingSource", + "EvaluationRuleNullFilterOperator", + "EvaluationRuleNumberFilterOperator", + "EvaluationRuleOptionsFilterOperator", + "EvaluationRuleStatus", + "EvaluationRuleStringFilterOperator", + "EvaluationRuleTarget", + "EvaluatorModelConfig", + "EvaluatorOutputDataType", + "EvaluatorOutputDefinition", + "EvaluatorOutputDefinition_Boolean", + "EvaluatorOutputDefinition_Categorical", + "EvaluatorOutputDefinition_Numeric", + "EvaluatorOutputFieldDefinition", + "EvaluatorScope", + "EvaluatorType", + "NullEvaluationRuleFilter", + "NumberEvaluationRuleFilter", + "NumberObjectEvaluationRuleFilter", + "PublicBooleanEvaluatorOutputDefinition", + "PublicCategoricalEvaluatorOutputDefinition", + "PublicCategoricalEvaluatorOutputScoreDefinition", + "PublicEvaluatorOutputDefinition", + "PublicEvaluatorOutputDefinition_Boolean", + "PublicEvaluatorOutputDefinition_Categorical", + "PublicEvaluatorOutputDefinition_Numeric", + "PublicNumericEvaluatorOutputDefinition", + "StringEvaluationRuleFilter", + "StringObjectEvaluationRuleFilter", + "StringOptionsEvaluationRuleFilter", +] diff --git a/langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py new file mode 100644 index 000000000..c89ce8b16 --- /dev/null +++ b/langfuse/api/unstable/commons/types/array_options_evaluation_rule_filter.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_array_options_filter_operator import ( + EvaluationRuleArrayOptionsFilterOperator, +) + + +class ArrayOptionsEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Column to filter on. + """ + + operator: EvaluationRuleArrayOptionsFilterOperator + value: typing.List[str] = pydantic.Field() + """ + One or more array elements to match. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py new file mode 100644 index 000000000..666b691bb --- /dev/null +++ b/langfuse/api/unstable/commons/types/boolean_evaluation_rule_filter.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_boolean_filter_operator import EvaluationRuleBooleanFilterOperator + + +class BooleanEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Column to filter on. + """ + + operator: EvaluationRuleBooleanFilterOperator + value: bool + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py new file mode 100644 index 000000000..97f13ae62 --- /dev/null +++ b/langfuse/api/unstable/commons/types/category_options_evaluation_rule_filter.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_options_filter_operator import EvaluationRuleOptionsFilterOperator + + +class CategoryOptionsEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Object-valued column to filter on. + """ + + key: str = pydantic.Field() + """ + Key inside the object-valued column to filter on. + """ + + operator: EvaluationRuleOptionsFilterOperator + value: typing.List[str] + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py new file mode 100644 index 000000000..9ee23b1fe --- /dev/null +++ b/langfuse/api/unstable/commons/types/date_time_evaluation_rule_filter.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator + + +class DateTimeEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Column to filter on. + """ + + operator: EvaluationRuleNumberFilterOperator = pydantic.Field() + """ + Comparison operator for datetime values. + """ + + value: dt.datetime = pydantic.Field() + """ + Datetime value to compare against. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py new file mode 100644 index 000000000..ba8f49a13 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_array_options_filter_operator.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleArrayOptionsFilterOperator(enum.StrEnum): + ANY_OF = "any of" + NONE_OF = "none of" + ALL_OF = "all of" + + def visit( + self, + any_of: typing.Callable[[], T_Result], + none_of: typing.Callable[[], T_Result], + all_of: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleArrayOptionsFilterOperator.ANY_OF: + return any_of() + if self is EvaluationRuleArrayOptionsFilterOperator.NONE_OF: + return none_of() + if self is EvaluationRuleArrayOptionsFilterOperator.ALL_OF: + return all_of() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py new file mode 100644 index 000000000..737d6063a --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_boolean_filter_operator.py @@ -0,0 +1,22 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleBooleanFilterOperator(enum.StrEnum): + EQUALS = "=" + NOT_EQUALS = "<>" + + def visit( + self, + equals: typing.Callable[[], T_Result], + not_equals: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleBooleanFilterOperator.EQUALS: + return equals() + if self is EvaluationRuleBooleanFilterOperator.NOT_EQUALS: + return not_equals() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/evaluation_rule_filter.py new file mode 100644 index 000000000..ea5e0420b --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_filter.py @@ -0,0 +1,740 @@ +# This file was auto-generated by Fern from our API Definition. + +from __future__ import annotations + +import datetime as dt +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_array_options_filter_operator import ( + EvaluationRuleArrayOptionsFilterOperator, +) +from .evaluation_rule_boolean_filter_operator import EvaluationRuleBooleanFilterOperator +from .evaluation_rule_null_filter_operator import EvaluationRuleNullFilterOperator +from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator +from .evaluation_rule_options_filter_operator import EvaluationRuleOptionsFilterOperator +from .evaluation_rule_string_filter_operator import EvaluationRuleStringFilterOperator + + +class EvaluationRuleFilter_Datetime(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["datetime"] = "datetime" + column: str + operator: EvaluationRuleNumberFilterOperator + value: dt.datetime + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_String(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["string"] = "string" + column: str + operator: EvaluationRuleStringFilterOperator + value: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_Number(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["number"] = "number" + column: str + operator: EvaluationRuleNumberFilterOperator + value: float + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_StringOptions(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["stringOptions"] = "stringOptions" + column: str + operator: EvaluationRuleOptionsFilterOperator + value: typing.List[str] + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_CategoryOptions(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["categoryOptions"] = "categoryOptions" + column: str + key: str + operator: EvaluationRuleOptionsFilterOperator + value: typing.List[str] + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_ArrayOptions(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["arrayOptions"] = "arrayOptions" + column: str + operator: EvaluationRuleArrayOptionsFilterOperator + value: typing.List[str] + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_StringObject(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["stringObject"] = "stringObject" + column: str + key: str + operator: EvaluationRuleStringFilterOperator + value: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_NumberObject(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["numberObject"] = "numberObject" + column: str + key: str + operator: EvaluationRuleNumberFilterOperator + value: float + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_Boolean(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["boolean"] = "boolean" + column: str + operator: EvaluationRuleBooleanFilterOperator + value: bool + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluationRuleFilter_Null(UniversalBaseModel): + """ + One filter condition used to decide whether a live-ingested target should be evaluated. + + An evaluation rule can include zero or more filter objects. All filters must be satisfied for the target to run. + + How to build a valid filter object: + - Pick the `target` first, because it changes the supported columns. + - Pick the filter `type`. That determines which fields are required. + - Use `key` only for object filters such as `metadata`. + - Use the correct `value` shape for the chosen filter `type`. + + Operator quick reference by filter `type`: + - `string`: `"="`, `contains`, `does not contain`, `starts with`, `ends with` + - `number`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `datetime`: `"="`, `">"`, `"<"`, `">="`, `"<="` + - `stringOptions`: `any of`, `none of` + - `arrayOptions`: `any of`, `none of`, `all of` + - `stringObject`: same operators as `string` + - `null`: `is null`, `is not null` + + Supported columns by target: + - `target=observation` + - `type`: `stringOptions`, operators `any of` / `none of`, values `GENERATION`, `SPAN`, `EVENT` + - `name`: `stringOptions`, operators `any of` / `none of` + - `environment`: `stringOptions`, operators `any of` / `none of` + - `level`: `stringOptions`, operators `any of` / `none of`, values `DEBUG`, `DEFAULT`, `WARNING`, `ERROR` + - `version`: `string` + - `traceName`: `stringOptions`, operators `any of` / `none of` + - `userId`: `string` + - `sessionId`: `string` + - `tags`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `metadata`: `stringObject` with `key` + - `parentObservationId`: `null`, operators `is null` / `is not null` + - `calledToolNames`: `arrayOptions`, operators `any of` / `none of` / `all of` + - `toolCalls`: `number` + - `target=experiment` + - `datasetId`: `stringOptions`, operators `any of` / `none of` + Use dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + Recovery guidance: + - `invalid_filter_value` with `details.column` but no `invalidValues`: the selected `column` is not supported for the chosen `target` + - `invalid_filter_value` with `details.invalidValues`: the selected values are not allowed for that column. Replace them with one of `details.allowedValues` when provided. + - `invalid_filter_value` for `column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, + ) + + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + """ + + type: typing.Literal["null"] = "null" + column: str + operator: EvaluationRuleNullFilterOperator + value: typing.Optional[str] = None + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +""" +from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleOptionsFilterOperator, +) + +EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], +) +""" +EvaluationRuleFilter = typing_extensions.Annotated[ + typing.Union[ + EvaluationRuleFilter_Datetime, + EvaluationRuleFilter_String, + EvaluationRuleFilter_Number, + EvaluationRuleFilter_StringOptions, + EvaluationRuleFilter_CategoryOptions, + EvaluationRuleFilter_ArrayOptions, + EvaluationRuleFilter_StringObject, + EvaluationRuleFilter_NumberObject, + EvaluationRuleFilter_Boolean, + EvaluationRuleFilter_Null, + ], + pydantic.Field(discriminator="type"), +] diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_mapping.py b/langfuse/api/unstable/commons/types/evaluation_rule_mapping.py new file mode 100644 index 000000000..1c407819c --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_mapping.py @@ -0,0 +1,74 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from .evaluation_rule_mapping_source import EvaluationRuleMappingSource + + +class EvaluationRuleMapping(UniversalBaseModel): + """ + Maps one evaluator prompt variable to one source field from the target object. + + How to build a valid mapping list: + 1. Create the evaluator or fetch it with `GET /evaluators/{id}`. + 2. Read the evaluator `variables` array. + 3. Add exactly one mapping object for each variable in that array. + 4. Use the variable name exactly as returned, without braces such as `{{` or `}}`. + 5. Choose a `source` that is valid for the selected `target`. + + `jsonPath` is optional. Use it only when the selected source is a JSON object and you want to extract one nested field before inserting it into the evaluator prompt. + + Recovery guidance: + - `invalid_variable_mapping`: the variable name is unknown for this evaluator, or the selected `source` is not valid for the chosen `target` + - `missing_variable_mapping`: one or more evaluator variables are not mapped yet + - `duplicate_variable_mapping`: the same evaluator variable appears more than once + - `invalid_json_path`: the JSONPath expression is malformed. Remove it or correct it. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluationRuleMapping, + EvaluationRuleMappingSource, + ) + + EvaluationRuleMapping( + variable="input", + source=EvaluationRuleMappingSource.INPUT, + ) + """ + + variable: str = pydantic.Field() + """ + Prompt variable name without braces. + + Example: for the prompt `Judge {{input}} against {{output}}`, use `input` and `output`. + """ + + source: EvaluationRuleMappingSource = pydantic.Field() + """ + Source field that should populate the prompt variable. + + Quick reference: + - `target=observation`: `input`, `output`, `metadata` + - `target=experiment`: `input`, `output`, `metadata`, `expected_output`, `experiment_item_metadata` + """ + + json_path: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="jsonPath") + ] = pydantic.Field(default=None) + """ + Optional JSONPath selector applied to the selected source before it is passed to the evaluator prompt. + + Requirements: + - Must start with `$` + - Must be a syntactically valid JSONPath expression + - Most useful with `source=metadata` + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py b/langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py new file mode 100644 index 000000000..391c66bbd --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_mapping_source.py @@ -0,0 +1,51 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleMappingSource(enum.StrEnum): + """ + Source field used to populate a prompt variable. + + Use these values when mapping evaluator prompt variables to live data. + + Target-specific rules: + - `target=observation` supports `input`, `output`, and `metadata` + - `target=experiment` supports `input`, `output`, `metadata`, `expected_output`, and `experiment_item_metadata` + + Source semantics: + - `input`: the observation or experiment input payload + - `output`: the observation or experiment output payload + - `metadata`: the metadata object for the target. Combine with `jsonPath` when you need one nested field instead of the whole object. + - `expected_output`: the experiment item's expected output. Only valid for `target=experiment`. + - `experiment_item_metadata`: the experiment item's metadata object. Only valid for `target=experiment`. + """ + + INPUT = "input" + OUTPUT = "output" + METADATA = "metadata" + EXPECTED_OUTPUT = "expected_output" + EXPERIMENT_ITEM_METADATA = "experiment_item_metadata" + + def visit( + self, + input: typing.Callable[[], T_Result], + output: typing.Callable[[], T_Result], + metadata: typing.Callable[[], T_Result], + expected_output: typing.Callable[[], T_Result], + experiment_item_metadata: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleMappingSource.INPUT: + return input() + if self is EvaluationRuleMappingSource.OUTPUT: + return output() + if self is EvaluationRuleMappingSource.METADATA: + return metadata() + if self is EvaluationRuleMappingSource.EXPECTED_OUTPUT: + return expected_output() + if self is EvaluationRuleMappingSource.EXPERIMENT_ITEM_METADATA: + return experiment_item_metadata() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py new file mode 100644 index 000000000..833c8406f --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_null_filter_operator.py @@ -0,0 +1,22 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleNullFilterOperator(enum.StrEnum): + IS_NULL = "is null" + IS_NOT_NULL = "is not null" + + def visit( + self, + is_null: typing.Callable[[], T_Result], + is_not_null: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleNullFilterOperator.IS_NULL: + return is_null() + if self is EvaluationRuleNullFilterOperator.IS_NOT_NULL: + return is_not_null() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py new file mode 100644 index 000000000..927523e04 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_number_filter_operator.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleNumberFilterOperator(enum.StrEnum): + EQUALS = "=" + GREATER_THAN = ">" + LESS_THAN = "<" + GREATER_THAN_OR_EQUAL = ">=" + LESS_THAN_OR_EQUAL = "<=" + + def visit( + self, + equals: typing.Callable[[], T_Result], + greater_than: typing.Callable[[], T_Result], + less_than: typing.Callable[[], T_Result], + greater_than_or_equal: typing.Callable[[], T_Result], + less_than_or_equal: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleNumberFilterOperator.EQUALS: + return equals() + if self is EvaluationRuleNumberFilterOperator.GREATER_THAN: + return greater_than() + if self is EvaluationRuleNumberFilterOperator.LESS_THAN: + return less_than() + if self is EvaluationRuleNumberFilterOperator.GREATER_THAN_OR_EQUAL: + return greater_than_or_equal() + if self is EvaluationRuleNumberFilterOperator.LESS_THAN_OR_EQUAL: + return less_than_or_equal() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py new file mode 100644 index 000000000..01cd13ea3 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_options_filter_operator.py @@ -0,0 +1,22 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleOptionsFilterOperator(enum.StrEnum): + ANY_OF = "any of" + NONE_OF = "none of" + + def visit( + self, + any_of: typing.Callable[[], T_Result], + none_of: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleOptionsFilterOperator.ANY_OF: + return any_of() + if self is EvaluationRuleOptionsFilterOperator.NONE_OF: + return none_of() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_status.py b/langfuse/api/unstable/commons/types/evaluation_rule_status.py new file mode 100644 index 000000000..4a313a962 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_status.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleStatus(enum.StrEnum): + """ + Effective runtime status of the evaluation rule. + + - `active`: enabled and currently runnable. + - `inactive`: disabled by configuration. + - `paused`: enabled, but Langfuse has blocked execution until the underlying issue is resolved. + """ + + ACTIVE = "active" + INACTIVE = "inactive" + PAUSED = "paused" + + def visit( + self, + active: typing.Callable[[], T_Result], + inactive: typing.Callable[[], T_Result], + paused: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleStatus.ACTIVE: + return active() + if self is EvaluationRuleStatus.INACTIVE: + return inactive() + if self is EvaluationRuleStatus.PAUSED: + return paused() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py b/langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py new file mode 100644 index 000000000..9955172b9 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_string_filter_operator.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleStringFilterOperator(enum.StrEnum): + EQUALS = "=" + CONTAINS = "contains" + DOES_NOT_CONTAIN = "does not contain" + STARTS_WITH = "starts with" + ENDS_WITH = "ends with" + + def visit( + self, + equals: typing.Callable[[], T_Result], + contains: typing.Callable[[], T_Result], + does_not_contain: typing.Callable[[], T_Result], + starts_with: typing.Callable[[], T_Result], + ends_with: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleStringFilterOperator.EQUALS: + return equals() + if self is EvaluationRuleStringFilterOperator.CONTAINS: + return contains() + if self is EvaluationRuleStringFilterOperator.DOES_NOT_CONTAIN: + return does_not_contain() + if self is EvaluationRuleStringFilterOperator.STARTS_WITH: + return starts_with() + if self is EvaluationRuleStringFilterOperator.ENDS_WITH: + return ends_with() diff --git a/langfuse/api/unstable/commons/types/evaluation_rule_target.py b/langfuse/api/unstable/commons/types/evaluation_rule_target.py new file mode 100644 index 000000000..186aa461c --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluation_rule_target.py @@ -0,0 +1,33 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluationRuleTarget(enum.StrEnum): + """ + The ingestion object type that should trigger evaluation runs. + + Choose the target first, because it changes both the valid filter columns and the valid variable-mapping sources: + - `observation` evaluates live-ingested observations such as generations, spans, and events. + It supports mapping from `input`, `output`, and `metadata`. + - `experiment` evaluates live experiment executions and can additionally map `expected_output` and `experiment_item_metadata`. + It currently supports filtering by `datasetId`. + Discover valid dataset IDs with `GET /api/public/v2/datasets`, then use the returned dataset `id` values in your filter. + """ + + OBSERVATION = "observation" + EXPERIMENT = "experiment" + + def visit( + self, + observation: typing.Callable[[], T_Result], + experiment: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluationRuleTarget.OBSERVATION: + return observation() + if self is EvaluationRuleTarget.EXPERIMENT: + return experiment() diff --git a/langfuse/api/unstable/commons/types/evaluator_model_config.py b/langfuse/api/unstable/commons/types/evaluator_model_config.py new file mode 100644 index 000000000..5473cca8f --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluator_model_config.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel + + +class EvaluatorModelConfig(UniversalBaseModel): + """ + Optional explicit model configuration for an evaluator. + + If omitted, Langfuse uses the project's default evaluation model. + If provided, the model must be available to the project when the evaluator or evaluation rule is enabled. + + To discover valid configured `provider` values for a project, call `GET /api/public/llm-connections` and read the `provider` field from the returned connections. + Use a `provider` value that matches one of the connections already configured in the same project. + + Recovery guidance: + - If evaluator creation returns `422` with `code=evaluator_preflight_failed`, either provide a valid explicit `modelConfig` here or configure the project's default evaluation model, then retry the same request. + + Examples + -------- + from langfuse.unstable.commons import EvaluatorModelConfig + + EvaluatorModelConfig( + provider="openai", + model="gpt-4.1-mini", + ) + """ + + provider: str = pydantic.Field() + """ + Provider identifier to use for this evaluator, for example `openai` or `anthropic`. + + To discover valid values for the current project, call `GET /api/public/llm-connections` and use one of the returned `provider` values. + """ + + model: str = pydantic.Field() + """ + Model identifier exposed by the provider, for example `gpt-4.1-mini`. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/evaluator_output_data_type.py b/langfuse/api/unstable/commons/types/evaluator_output_data_type.py new file mode 100644 index 000000000..a6c309868 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluator_output_data_type.py @@ -0,0 +1,35 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluatorOutputDataType(enum.StrEnum): + """ + Structured score type returned by an evaluator. + + This controls the type of score value Langfuse stores for evaluation results: + - `NUMERIC`: a numeric score such as `0.82` + - `BOOLEAN`: a boolean score such as `true` + - `CATEGORICAL`: one or more category labels from a fixed list + """ + + NUMERIC = "NUMERIC" + BOOLEAN = "BOOLEAN" + CATEGORICAL = "CATEGORICAL" + + def visit( + self, + numeric: typing.Callable[[], T_Result], + boolean: typing.Callable[[], T_Result], + categorical: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluatorOutputDataType.NUMERIC: + return numeric() + if self is EvaluatorOutputDataType.BOOLEAN: + return boolean() + if self is EvaluatorOutputDataType.CATEGORICAL: + return categorical() diff --git a/langfuse/api/unstable/commons/types/evaluator_output_definition.py b/langfuse/api/unstable/commons/types/evaluator_output_definition.py new file mode 100644 index 000000000..f545a19a8 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluator_output_definition.py @@ -0,0 +1,161 @@ +# This file was auto-generated by Fern from our API Definition. + +from __future__ import annotations + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition +from .public_categorical_evaluator_output_score_definition import ( + PublicCategoricalEvaluatorOutputScoreDefinition, +) + + +class EvaluatorOutputDefinition_Numeric(UniversalBaseModel): + """ + Structured output definition to send when creating an evaluator. + + Agent guidance: + - `dataType` is required. + - Do not send `version`; that is an internal storage detail and is not part of the public request contract. + - For `NUMERIC` and `BOOLEAN`, provide `reasoning.description` and `score.description`. + - For `CATEGORICAL`, also provide `score.categories` and `score.shouldAllowMultipleMatches`. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, + ) + + EvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), + ) + """ + + data_type: typing_extensions.Annotated[ + typing.Literal["NUMERIC"], FieldMetadata(alias="dataType") + ] = "NUMERIC" + reasoning: EvaluatorOutputFieldDefinition + score: EvaluatorOutputFieldDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluatorOutputDefinition_Boolean(UniversalBaseModel): + """ + Structured output definition to send when creating an evaluator. + + Agent guidance: + - `dataType` is required. + - Do not send `version`; that is an internal storage detail and is not part of the public request contract. + - For `NUMERIC` and `BOOLEAN`, provide `reasoning.description` and `score.description`. + - For `CATEGORICAL`, also provide `score.categories` and `score.shouldAllowMultipleMatches`. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, + ) + + EvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), + ) + """ + + data_type: typing_extensions.Annotated[ + typing.Literal["BOOLEAN"], FieldMetadata(alias="dataType") + ] = "BOOLEAN" + reasoning: EvaluatorOutputFieldDefinition + score: EvaluatorOutputFieldDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class EvaluatorOutputDefinition_Categorical(UniversalBaseModel): + """ + Structured output definition to send when creating an evaluator. + + Agent guidance: + - `dataType` is required. + - Do not send `version`; that is an internal storage detail and is not part of the public request contract. + - For `NUMERIC` and `BOOLEAN`, provide `reasoning.description` and `score.description`. + - For `CATEGORICAL`, also provide `score.categories` and `score.shouldAllowMultipleMatches`. + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, + ) + + EvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), + ) + """ + + data_type: typing_extensions.Annotated[ + typing.Literal["CATEGORICAL"], FieldMetadata(alias="dataType") + ] = "CATEGORICAL" + reasoning: EvaluatorOutputFieldDefinition + score: PublicCategoricalEvaluatorOutputScoreDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +""" +from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, +) + +EvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), +) +""" +EvaluatorOutputDefinition = typing_extensions.Annotated[ + typing.Union[ + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputDefinition_Boolean, + EvaluatorOutputDefinition_Categorical, + ], + pydantic.Field(discriminator="data_type"), +] diff --git a/langfuse/api/unstable/commons/types/evaluator_output_field_definition.py b/langfuse/api/unstable/commons/types/evaluator_output_field_definition.py new file mode 100644 index 000000000..419610d0a --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluator_output_field_definition.py @@ -0,0 +1,17 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel + + +class EvaluatorOutputFieldDefinition(UniversalBaseModel): + description: str = pydantic.Field() + """ + Human-readable instructions for what the evaluator should return in this field. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/evaluator_scope.py b/langfuse/api/unstable/commons/types/evaluator_scope.py new file mode 100644 index 000000000..7ce796418 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluator_scope.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluatorScope(enum.StrEnum): + """ + Where an evaluator comes from. + + - `project`: created in your project + - `managed`: provided by Langfuse + """ + + PROJECT = "project" + MANAGED = "managed" + + def visit( + self, + project: typing.Callable[[], T_Result], + managed: typing.Callable[[], T_Result], + ) -> T_Result: + if self is EvaluatorScope.PROJECT: + return project() + if self is EvaluatorScope.MANAGED: + return managed() diff --git a/langfuse/api/unstable/commons/types/evaluator_type.py b/langfuse/api/unstable/commons/types/evaluator_type.py new file mode 100644 index 000000000..d411d6111 --- /dev/null +++ b/langfuse/api/unstable/commons/types/evaluator_type.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class EvaluatorType(enum.StrEnum): + """ + The evaluator engine type. + + The unstable public API currently supports only LLM-as-a-judge evaluators. + """ + + LLM_AS_JUDGE = "llm_as_judge" + + def visit(self, llm_as_judge: typing.Callable[[], T_Result]) -> T_Result: + if self is EvaluatorType.LLM_AS_JUDGE: + return llm_as_judge() diff --git a/langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py new file mode 100644 index 000000000..d224d7590 --- /dev/null +++ b/langfuse/api/unstable/commons/types/null_evaluation_rule_filter.py @@ -0,0 +1,24 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_null_filter_operator import EvaluationRuleNullFilterOperator + + +class NullEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Column to filter on. In the unstable public API this is currently `parentObservationId`. + """ + + operator: EvaluationRuleNullFilterOperator + value: typing.Optional[str] = pydantic.Field(default=None) + """ + Ignored placeholder value. Clients may omit it or send an empty string. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py new file mode 100644 index 000000000..f9c489291 --- /dev/null +++ b/langfuse/api/unstable/commons/types/number_evaluation_rule_filter.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator + + +class NumberEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Column to filter on. + """ + + operator: EvaluationRuleNumberFilterOperator + value: float + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py new file mode 100644 index 000000000..fd9462174 --- /dev/null +++ b/langfuse/api/unstable/commons/types/number_object_evaluation_rule_filter.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_number_filter_operator import EvaluationRuleNumberFilterOperator + + +class NumberObjectEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Object-valued column to filter on. + """ + + key: str = pydantic.Field() + """ + Key inside the object-valued column to filter on. + """ + + operator: EvaluationRuleNumberFilterOperator + value: float + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py new file mode 100644 index 000000000..7baaf209a --- /dev/null +++ b/langfuse/api/unstable/commons/types/public_boolean_evaluator_output_definition.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from .evaluator_output_data_type import EvaluatorOutputDataType +from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition + + +class PublicBooleanEvaluatorOutputDefinition(UniversalBaseModel): + data_type: typing_extensions.Annotated[ + EvaluatorOutputDataType, FieldMetadata(alias="dataType") + ] = pydantic.Field() + """ + Always `BOOLEAN`. + """ + + reasoning: EvaluatorOutputFieldDefinition + score: EvaluatorOutputFieldDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py new file mode 100644 index 000000000..30d4673bb --- /dev/null +++ b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_definition.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from .evaluator_output_data_type import EvaluatorOutputDataType +from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition +from .public_categorical_evaluator_output_score_definition import ( + PublicCategoricalEvaluatorOutputScoreDefinition, +) + + +class PublicCategoricalEvaluatorOutputDefinition(UniversalBaseModel): + data_type: typing_extensions.Annotated[ + EvaluatorOutputDataType, FieldMetadata(alias="dataType") + ] = pydantic.Field() + """ + Always `CATEGORICAL`. + """ + + reasoning: EvaluatorOutputFieldDefinition + score: PublicCategoricalEvaluatorOutputScoreDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py new file mode 100644 index 000000000..81deadb93 --- /dev/null +++ b/langfuse/api/unstable/commons/types/public_categorical_evaluator_output_score_definition.py @@ -0,0 +1,20 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata + + +class PublicCategoricalEvaluatorOutputScoreDefinition(UniversalBaseModel): + description: str + categories: typing.List[str] + should_allow_multiple_matches: typing_extensions.Annotated[ + bool, FieldMetadata(alias="shouldAllowMultipleMatches") + ] + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/public_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_evaluator_output_definition.py new file mode 100644 index 000000000..43c7aa9ba --- /dev/null +++ b/langfuse/api/unstable/commons/types/public_evaluator_output_definition.py @@ -0,0 +1,167 @@ +# This file was auto-generated by Fern from our API Definition. + +from __future__ import annotations + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition +from .public_categorical_evaluator_output_score_definition import ( + PublicCategoricalEvaluatorOutputScoreDefinition, +) + + +class PublicEvaluatorOutputDefinition_Numeric(UniversalBaseModel): + """ + Evaluator output definition returned by the public API. + + This response always includes `dataType` and never includes an internal output-definition `version`. + Legacy stored evaluator definitions are normalized into this shape before they are returned. + + Use this response shape when deciding how to interpret future evaluation scores: + - `NUMERIC`: expect numeric score values + - `BOOLEAN`: expect `true` / `false` + - `CATEGORICAL`: expect one or more values from `score.categories` + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputFieldDefinition, + PublicEvaluatorOutputDefinition_Numeric, + ) + + PublicEvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), + ) + """ + + data_type: typing_extensions.Annotated[ + typing.Literal["NUMERIC"], FieldMetadata(alias="dataType") + ] = "NUMERIC" + reasoning: EvaluatorOutputFieldDefinition + score: EvaluatorOutputFieldDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class PublicEvaluatorOutputDefinition_Boolean(UniversalBaseModel): + """ + Evaluator output definition returned by the public API. + + This response always includes `dataType` and never includes an internal output-definition `version`. + Legacy stored evaluator definitions are normalized into this shape before they are returned. + + Use this response shape when deciding how to interpret future evaluation scores: + - `NUMERIC`: expect numeric score values + - `BOOLEAN`: expect `true` / `false` + - `CATEGORICAL`: expect one or more values from `score.categories` + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputFieldDefinition, + PublicEvaluatorOutputDefinition_Numeric, + ) + + PublicEvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), + ) + """ + + data_type: typing_extensions.Annotated[ + typing.Literal["BOOLEAN"], FieldMetadata(alias="dataType") + ] = "BOOLEAN" + reasoning: EvaluatorOutputFieldDefinition + score: EvaluatorOutputFieldDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +class PublicEvaluatorOutputDefinition_Categorical(UniversalBaseModel): + """ + Evaluator output definition returned by the public API. + + This response always includes `dataType` and never includes an internal output-definition `version`. + Legacy stored evaluator definitions are normalized into this shape before they are returned. + + Use this response shape when deciding how to interpret future evaluation scores: + - `NUMERIC`: expect numeric score values + - `BOOLEAN`: expect `true` / `false` + - `CATEGORICAL`: expect one or more values from `score.categories` + + Examples + -------- + from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputFieldDefinition, + PublicEvaluatorOutputDefinition_Numeric, + ) + + PublicEvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), + ) + """ + + data_type: typing_extensions.Annotated[ + typing.Literal["CATEGORICAL"], FieldMetadata(alias="dataType") + ] = "CATEGORICAL" + reasoning: EvaluatorOutputFieldDefinition + score: PublicCategoricalEvaluatorOutputScoreDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) + + +""" +from langfuse.unstable.commons import ( + EvaluatorOutputDataType, + EvaluatorOutputFieldDefinition, + PublicEvaluatorOutputDefinition_Numeric, +) + +PublicEvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the answer is correct or incorrect.", + ), + score=EvaluatorOutputFieldDefinition( + description="Return a score between 0 and 1.", + ), +) +""" +PublicEvaluatorOutputDefinition = typing_extensions.Annotated[ + typing.Union[ + PublicEvaluatorOutputDefinition_Numeric, + PublicEvaluatorOutputDefinition_Boolean, + PublicEvaluatorOutputDefinition_Categorical, + ], + pydantic.Field(discriminator="data_type"), +] diff --git a/langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py b/langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py new file mode 100644 index 000000000..68987d2ff --- /dev/null +++ b/langfuse/api/unstable/commons/types/public_numeric_evaluator_output_definition.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from .evaluator_output_data_type import EvaluatorOutputDataType +from .evaluator_output_field_definition import EvaluatorOutputFieldDefinition + + +class PublicNumericEvaluatorOutputDefinition(UniversalBaseModel): + data_type: typing_extensions.Annotated[ + EvaluatorOutputDataType, FieldMetadata(alias="dataType") + ] = pydantic.Field() + """ + Always `NUMERIC`. + """ + + reasoning: EvaluatorOutputFieldDefinition + score: EvaluatorOutputFieldDefinition + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py new file mode 100644 index 000000000..bd9332092 --- /dev/null +++ b/langfuse/api/unstable/commons/types/string_evaluation_rule_filter.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_string_filter_operator import EvaluationRuleStringFilterOperator + + +class StringEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Column to filter on. + """ + + operator: EvaluationRuleStringFilterOperator + value: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py new file mode 100644 index 000000000..6c287aad6 --- /dev/null +++ b/langfuse/api/unstable/commons/types/string_object_evaluation_rule_filter.py @@ -0,0 +1,26 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_string_filter_operator import EvaluationRuleStringFilterOperator + + +class StringObjectEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Object-valued column to filter on. In the unstable public API this is currently `metadata`. + """ + + key: str = pydantic.Field() + """ + Top-level key inside the object-valued column to filter on. + """ + + operator: EvaluationRuleStringFilterOperator + value: str + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py b/langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py new file mode 100644 index 000000000..a830e5ad9 --- /dev/null +++ b/langfuse/api/unstable/commons/types/string_options_evaluation_rule_filter.py @@ -0,0 +1,24 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .evaluation_rule_options_filter_operator import EvaluationRuleOptionsFilterOperator + + +class StringOptionsEvaluationRuleFilter(UniversalBaseModel): + column: str = pydantic.Field() + """ + Column to filter on. + """ + + operator: EvaluationRuleOptionsFilterOperator + value: typing.List[str] = pydantic.Field() + """ + One or more allowed string values. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/errors/__init__.py b/langfuse/api/unstable/errors/__init__.py new file mode 100644 index 000000000..42f230c41 --- /dev/null +++ b/langfuse/api/unstable/errors/__init__.py @@ -0,0 +1,84 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .types import ( + PublicApiError, + PublicApiErrorCode, + PublicApiErrorDetails, + PublicApiValidationIssue, + ) + from .errors import ( + AccessDeniedError, + BadRequestError, + ConflictError, + InternalServerError, + MethodNotAllowedError, + NotFoundError, + TooManyRequestsError, + UnauthorizedError, + UnprocessableContentError, + ) +_dynamic_imports: typing.Dict[str, str] = { + "AccessDeniedError": ".errors", + "BadRequestError": ".errors", + "ConflictError": ".errors", + "InternalServerError": ".errors", + "MethodNotAllowedError": ".errors", + "NotFoundError": ".errors", + "PublicApiError": ".types", + "PublicApiErrorCode": ".types", + "PublicApiErrorDetails": ".types", + "PublicApiValidationIssue": ".types", + "TooManyRequestsError": ".errors", + "UnauthorizedError": ".errors", + "UnprocessableContentError": ".errors", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "AccessDeniedError", + "BadRequestError", + "ConflictError", + "InternalServerError", + "MethodNotAllowedError", + "NotFoundError", + "PublicApiError", + "PublicApiErrorCode", + "PublicApiErrorDetails", + "PublicApiValidationIssue", + "TooManyRequestsError", + "UnauthorizedError", + "UnprocessableContentError", +] diff --git a/langfuse/api/unstable/errors/errors/__init__.py b/langfuse/api/unstable/errors/errors/__init__.py new file mode 100644 index 000000000..510e3beb1 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/__init__.py @@ -0,0 +1,68 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .access_denied_error import AccessDeniedError + from .bad_request_error import BadRequestError + from .conflict_error import ConflictError + from .internal_server_error import InternalServerError + from .method_not_allowed_error import MethodNotAllowedError + from .not_found_error import NotFoundError + from .too_many_requests_error import TooManyRequestsError + from .unauthorized_error import UnauthorizedError + from .unprocessable_content_error import UnprocessableContentError +_dynamic_imports: typing.Dict[str, str] = { + "AccessDeniedError": ".access_denied_error", + "BadRequestError": ".bad_request_error", + "ConflictError": ".conflict_error", + "InternalServerError": ".internal_server_error", + "MethodNotAllowedError": ".method_not_allowed_error", + "NotFoundError": ".not_found_error", + "TooManyRequestsError": ".too_many_requests_error", + "UnauthorizedError": ".unauthorized_error", + "UnprocessableContentError": ".unprocessable_content_error", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "AccessDeniedError", + "BadRequestError", + "ConflictError", + "InternalServerError", + "MethodNotAllowedError", + "NotFoundError", + "TooManyRequestsError", + "UnauthorizedError", + "UnprocessableContentError", +] diff --git a/langfuse/api/unstable/errors/errors/access_denied_error.py b/langfuse/api/unstable/errors/errors/access_denied_error.py new file mode 100644 index 000000000..6e07b4c79 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/access_denied_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class AccessDeniedError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=403, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/bad_request_error.py b/langfuse/api/unstable/errors/errors/bad_request_error.py new file mode 100644 index 000000000..7ba4c1a00 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/bad_request_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class BadRequestError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=400, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/conflict_error.py b/langfuse/api/unstable/errors/errors/conflict_error.py new file mode 100644 index 000000000..3630eec67 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/conflict_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class ConflictError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=409, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/internal_server_error.py b/langfuse/api/unstable/errors/errors/internal_server_error.py new file mode 100644 index 000000000..5921a86ae --- /dev/null +++ b/langfuse/api/unstable/errors/errors/internal_server_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class InternalServerError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=500, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/method_not_allowed_error.py b/langfuse/api/unstable/errors/errors/method_not_allowed_error.py new file mode 100644 index 000000000..547598806 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/method_not_allowed_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class MethodNotAllowedError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=405, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/not_found_error.py b/langfuse/api/unstable/errors/errors/not_found_error.py new file mode 100644 index 000000000..1b65b230e --- /dev/null +++ b/langfuse/api/unstable/errors/errors/not_found_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class NotFoundError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=404, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/too_many_requests_error.py b/langfuse/api/unstable/errors/errors/too_many_requests_error.py new file mode 100644 index 000000000..2a8345bc7 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/too_many_requests_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class TooManyRequestsError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=429, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/unauthorized_error.py b/langfuse/api/unstable/errors/errors/unauthorized_error.py new file mode 100644 index 000000000..84d847643 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/unauthorized_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class UnauthorizedError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=401, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/errors/unprocessable_content_error.py b/langfuse/api/unstable/errors/errors/unprocessable_content_error.py new file mode 100644 index 000000000..a701ef9c5 --- /dev/null +++ b/langfuse/api/unstable/errors/errors/unprocessable_content_error.py @@ -0,0 +1,15 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core.api_error import ApiError +from ..types.public_api_error import PublicApiError + + +class UnprocessableContentError(ApiError): + def __init__( + self, + body: PublicApiError, + headers: typing.Optional[typing.Dict[str, str]] = None, + ): + super().__init__(status_code=422, headers=headers, body=body) diff --git a/langfuse/api/unstable/errors/types/__init__.py b/langfuse/api/unstable/errors/types/__init__.py new file mode 100644 index 000000000..fd016304e --- /dev/null +++ b/langfuse/api/unstable/errors/types/__init__.py @@ -0,0 +1,53 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .public_api_error import PublicApiError + from .public_api_error_code import PublicApiErrorCode + from .public_api_error_details import PublicApiErrorDetails + from .public_api_validation_issue import PublicApiValidationIssue +_dynamic_imports: typing.Dict[str, str] = { + "PublicApiError": ".public_api_error", + "PublicApiErrorCode": ".public_api_error_code", + "PublicApiErrorDetails": ".public_api_error_details", + "PublicApiValidationIssue": ".public_api_validation_issue", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "PublicApiError", + "PublicApiErrorCode", + "PublicApiErrorDetails", + "PublicApiValidationIssue", +] diff --git a/langfuse/api/unstable/errors/types/public_api_error.py b/langfuse/api/unstable/errors/types/public_api_error.py new file mode 100644 index 000000000..5d1384e7c --- /dev/null +++ b/langfuse/api/unstable/errors/types/public_api_error.py @@ -0,0 +1,58 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from .public_api_error_code import PublicApiErrorCode +from .public_api_error_details import PublicApiErrorDetails + + +class PublicApiError(UniversalBaseModel): + """ + Standard error envelope for the unstable evaluators API. + + Response handling guidance: + - Use the HTTP status code for the broad class of failure. + - Use `code` for precise branching in SDKs, CLIs, or agents. + - Inspect `details` for field-level validation context such as invalid filter values, malformed JSONPath expressions, or missing variable mappings. + - Retry only after fixing the specific issue described by `code` and `details`. + + Examples + -------- + from langfuse.unstable.errors import ( + PublicApiError, + PublicApiErrorCode, + PublicApiErrorDetails, + ) + + PublicApiError( + message='Filter column "type" contains unsupported value(s): INVALID', + code=PublicApiErrorCode.INVALID_FILTER_VALUE, + details=PublicApiErrorDetails( + field="filter[0].value", + column="type", + invalid_values=["INVALID"], + allowed_values=["GENERATION", "SPAN", "EVENT"], + ), + ) + """ + + message: str = pydantic.Field() + """ + Human-readable description of the failure. + """ + + code: PublicApiErrorCode = pydantic.Field() + """ + Stable machine-readable error code. + """ + + details: typing.Optional[PublicApiErrorDetails] = pydantic.Field(default=None) + """ + Optional structured error context. Inspect the populated fields based on `code`. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/errors/types/public_api_error_code.py b/langfuse/api/unstable/errors/types/public_api_error_code.py new file mode 100644 index 000000000..fe8f67f83 --- /dev/null +++ b/langfuse/api/unstable/errors/types/public_api_error_code.py @@ -0,0 +1,93 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ....core import enum + +T_Result = typing.TypeVar("T_Result") + + +class PublicApiErrorCode(enum.StrEnum): + """ + Machine-readable error code returned by the unstable evaluators API. + + SDKs, CLIs, and agents should branch on `code` rather than parsing the human-readable `message`. + The HTTP status still indicates the broad error class, while `code` gives the specific failure reason. + """ + + AUTHENTICATION_FAILED = "authentication_failed" + ACCESS_DENIED = "access_denied" + INVALID_REQUEST = "invalid_request" + INVALID_QUERY = "invalid_query" + INVALID_BODY = "invalid_body" + INVALID_FILTER_VALUE = "invalid_filter_value" + INVALID_JSON_PATH = "invalid_json_path" + INVALID_VARIABLE_MAPPING = "invalid_variable_mapping" + MISSING_VARIABLE_MAPPING = "missing_variable_mapping" + DUPLICATE_VARIABLE_MAPPING = "duplicate_variable_mapping" + RESOURCE_NOT_FOUND = "resource_not_found" + NAME_CONFLICT = "name_conflict" + EVALUATOR_PREFLIGHT_FAILED = "evaluator_preflight_failed" + CONFLICT = "conflict" + UNPROCESSABLE_CONTENT = "unprocessable_content" + RATE_LIMITED = "rate_limited" + METHOD_NOT_ALLOWED = "method_not_allowed" + INTERNAL_ERROR = "internal_error" + + def visit( + self, + authentication_failed: typing.Callable[[], T_Result], + access_denied: typing.Callable[[], T_Result], + invalid_request: typing.Callable[[], T_Result], + invalid_query: typing.Callable[[], T_Result], + invalid_body: typing.Callable[[], T_Result], + invalid_filter_value: typing.Callable[[], T_Result], + invalid_json_path: typing.Callable[[], T_Result], + invalid_variable_mapping: typing.Callable[[], T_Result], + missing_variable_mapping: typing.Callable[[], T_Result], + duplicate_variable_mapping: typing.Callable[[], T_Result], + resource_not_found: typing.Callable[[], T_Result], + name_conflict: typing.Callable[[], T_Result], + evaluator_preflight_failed: typing.Callable[[], T_Result], + conflict: typing.Callable[[], T_Result], + unprocessable_content: typing.Callable[[], T_Result], + rate_limited: typing.Callable[[], T_Result], + method_not_allowed: typing.Callable[[], T_Result], + internal_error: typing.Callable[[], T_Result], + ) -> T_Result: + if self is PublicApiErrorCode.AUTHENTICATION_FAILED: + return authentication_failed() + if self is PublicApiErrorCode.ACCESS_DENIED: + return access_denied() + if self is PublicApiErrorCode.INVALID_REQUEST: + return invalid_request() + if self is PublicApiErrorCode.INVALID_QUERY: + return invalid_query() + if self is PublicApiErrorCode.INVALID_BODY: + return invalid_body() + if self is PublicApiErrorCode.INVALID_FILTER_VALUE: + return invalid_filter_value() + if self is PublicApiErrorCode.INVALID_JSON_PATH: + return invalid_json_path() + if self is PublicApiErrorCode.INVALID_VARIABLE_MAPPING: + return invalid_variable_mapping() + if self is PublicApiErrorCode.MISSING_VARIABLE_MAPPING: + return missing_variable_mapping() + if self is PublicApiErrorCode.DUPLICATE_VARIABLE_MAPPING: + return duplicate_variable_mapping() + if self is PublicApiErrorCode.RESOURCE_NOT_FOUND: + return resource_not_found() + if self is PublicApiErrorCode.NAME_CONFLICT: + return name_conflict() + if self is PublicApiErrorCode.EVALUATOR_PREFLIGHT_FAILED: + return evaluator_preflight_failed() + if self is PublicApiErrorCode.CONFLICT: + return conflict() + if self is PublicApiErrorCode.UNPROCESSABLE_CONTENT: + return unprocessable_content() + if self is PublicApiErrorCode.RATE_LIMITED: + return rate_limited() + if self is PublicApiErrorCode.METHOD_NOT_ALLOWED: + return method_not_allowed() + if self is PublicApiErrorCode.INTERNAL_ERROR: + return internal_error() diff --git a/langfuse/api/unstable/errors/types/public_api_error_details.py b/langfuse/api/unstable/errors/types/public_api_error_details.py new file mode 100644 index 000000000..803378164 --- /dev/null +++ b/langfuse/api/unstable/errors/types/public_api_error_details.py @@ -0,0 +1,114 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from .public_api_validation_issue import PublicApiValidationIssue + + +class PublicApiErrorDetails(UniversalBaseModel): + """ + Optional structured context attached to an unstable-evals error. + + The populated fields depend on the error `code`: + - request parsing failures populate `issues` + - filter validation failures populate `field`, `column`, `invalidValues`, and `allowedValues` + - variable mapping failures populate `field`, `variable`, or `variables` + - JSONPath validation failures populate `field`, `variable`, and `value` + - evaluator preflight failures populate `evaluatorName`, `provider`, and `model` + - rate limiting populates `retryAfterSeconds`, `limit`, `remaining`, and `resetAt` + """ + + issues: typing.Optional[typing.List[PublicApiValidationIssue]] = pydantic.Field( + default=None + ) + """ + Validation issues for malformed request bodies or query parameters. + """ + + field: typing.Optional[str] = pydantic.Field(default=None) + """ + Path-like reference to the failing field, for example `mapping[1].jsonPath`. + """ + + column: typing.Optional[str] = pydantic.Field(default=None) + """ + Filter column that failed validation. + """ + + invalid_values: typing_extensions.Annotated[ + typing.Optional[typing.List[str]], FieldMetadata(alias="invalidValues") + ] = pydantic.Field(default=None) + """ + Unsupported values supplied by the caller. + """ + + allowed_values: typing_extensions.Annotated[ + typing.Optional[typing.List[str]], FieldMetadata(alias="allowedValues") + ] = pydantic.Field(default=None) + """ + Allowed values for the failing filter column. + """ + + variable: typing.Optional[str] = pydantic.Field(default=None) + """ + Evaluator variable involved in the failure. + """ + + variables: typing.Optional[typing.List[str]] = pydantic.Field(default=None) + """ + Multiple evaluator variables involved in the failure, for example missing mappings. + """ + + value: typing.Optional[str] = pydantic.Field(default=None) + """ + Raw invalid value supplied by the caller. + """ + + evaluator_name: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="evaluatorName") + ] = pydantic.Field(default=None) + """ + Evaluator name used during preflight validation. + """ + + provider: typing.Optional[str] = pydantic.Field(default=None) + """ + Provider resolved during evaluator preflight, if any. + """ + + model: typing.Optional[str] = pydantic.Field(default=None) + """ + Model resolved during evaluator preflight, if any. + """ + + retry_after_seconds: typing_extensions.Annotated[ + typing.Optional[int], FieldMetadata(alias="retryAfterSeconds") + ] = pydantic.Field(default=None) + """ + Suggested retry delay for rate-limited requests. + """ + + limit: typing.Optional[int] = pydantic.Field(default=None) + """ + Numeric limit associated with the failure, for example the active evaluation-rule cap or the current rate-limit window. + """ + + remaining: typing.Optional[int] = pydantic.Field(default=None) + """ + Remaining requests in the current rate-limit window. + """ + + reset_at: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="resetAt") + ] = pydantic.Field(default=None) + """ + ISO-8601 timestamp when the current rate-limit window resets. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/errors/types/public_api_validation_issue.py b/langfuse/api/unstable/errors/types/public_api_validation_issue.py new file mode 100644 index 000000000..877d0376a --- /dev/null +++ b/langfuse/api/unstable/errors/types/public_api_validation_issue.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel + + +class PublicApiValidationIssue(UniversalBaseModel): + """ + One validation issue returned for malformed request bodies or query parameters. + + This mirrors the most important parts of a Zod issue: a machine-readable `code`, + a human-readable `message`, and a structured `path`. + """ + + code: str = pydantic.Field() + """ + Machine-readable validation issue code emitted by the server validator. + """ + + message: str = pydantic.Field() + """ + Human-readable explanation of the validation failure. + """ + + path: typing.List[typing.Any] = pydantic.Field() + """ + Path to the invalid field, for example `["mapping", 0, "jsonPath"]`. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluation_rules/__init__.py b/langfuse/api/unstable/evaluation_rules/__init__.py new file mode 100644 index 000000000..f0c007231 --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/__init__.py @@ -0,0 +1,64 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .types import ( + CreateEvaluationRuleRequest, + DeleteEvaluationRuleResponse, + EvaluationRule, + EvaluationRuleEvaluator, + EvaluationRuleEvaluatorReference, + EvaluationRules, + UpdateEvaluationRuleRequest, + ) +_dynamic_imports: typing.Dict[str, str] = { + "CreateEvaluationRuleRequest": ".types", + "DeleteEvaluationRuleResponse": ".types", + "EvaluationRule": ".types", + "EvaluationRuleEvaluator": ".types", + "EvaluationRuleEvaluatorReference": ".types", + "EvaluationRules": ".types", + "UpdateEvaluationRuleRequest": ".types", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "CreateEvaluationRuleRequest", + "DeleteEvaluationRuleResponse", + "EvaluationRule", + "EvaluationRuleEvaluator", + "EvaluationRuleEvaluatorReference", + "EvaluationRules", + "UpdateEvaluationRuleRequest", +] diff --git a/langfuse/api/unstable/evaluation_rules/client.py b/langfuse/api/unstable/evaluation_rules/client.py new file mode 100644 index 000000000..20e56e6c3 --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/client.py @@ -0,0 +1,859 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from ...core.request_options import RequestOptions +from ..commons.types.evaluation_rule_filter import EvaluationRuleFilter +from ..commons.types.evaluation_rule_mapping import EvaluationRuleMapping +from ..commons.types.evaluation_rule_target import EvaluationRuleTarget +from .raw_client import AsyncRawEvaluationRulesClient, RawEvaluationRulesClient +from .types.delete_evaluation_rule_response import DeleteEvaluationRuleResponse +from .types.evaluation_rule import EvaluationRule +from .types.evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference +from .types.evaluation_rules import EvaluationRules + +# this is used as the default value for optional parameters +OMIT = typing.cast(typing.Any, ...) + + +class EvaluationRulesClient: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._raw_client = RawEvaluationRulesClient(client_wrapper=client_wrapper) + + @property + def with_raw_response(self) -> RawEvaluationRulesClient: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + RawEvaluationRulesClient + """ + return self._raw_client + + def create( + self, + *, + name: str, + evaluator: EvaluationRuleEvaluatorReference, + target: EvaluationRuleTarget, + enabled: bool, + mapping: typing.Sequence[EvaluationRuleMapping], + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRule: + """ + Create an evaluation rule. + + An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data. + + Use this resource after choosing an evaluator from the evaluator endpoints. + + Key rules: + - `name` must be unique within the project for public evaluation rules + - `target` must be `observation` or `experiment` + - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints + - Langfuse resolves that family to its latest version before saving the evaluation rule + - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId` + - every evaluator prompt variable must be mapped exactly once + - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment` + - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run + - at most 50 evaluation rules can be effectively active in one project at the same time + + If an evaluation rule with the same `name` already exists in the project, the API returns `409`. + In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one. + + If enabling this resource would exceed the 50-active limit, the API also returns `409`. + In that case, disable or pause another active evaluation rule before enabling a new one. + + Current scope: + - evaluation rules are live-ingestion rules only + - they do not trigger historical backfills + + Recovery guidance: + - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues` + - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response + - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping` + - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable + - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name + - `400 invalid_json_path`: remove or correct the `jsonPath` + - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request. + + Parameters + ---------- + name : str + Human-readable deployment name. + + evaluator : EvaluationRuleEvaluatorReference + Evaluator family to use. + + Use `name` and `scope` from the evaluator endpoints. + Langfuse resolves that family to its latest version before saving the rule. + + target : EvaluationRuleTarget + Target object type to evaluate. + + enabled : bool + Whether the deployment should be active immediately after creation. + + mapping : typing.Sequence[EvaluationRuleMapping] + Required variable mappings. + + Every evaluator variable must appear exactly once. + Build this list from the evaluator `variables` array returned by the evaluator endpoints. + + sampling : typing.Optional[float] + Optional sampling fraction. Defaults to `1`. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Optional filter list. + + Omit or pass an empty list to evaluate all matching targets for the selected `target`. + Each filter object must use a column that is valid for that `target`. + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRule + + Examples + -------- + from langfuse import LangfuseAPI + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleMapping, + EvaluationRuleMappingSource, + EvaluationRuleOptionsFilterOperator, + EvaluationRuleTarget, + EvaluatorScope, + ) + from langfuse.unstable.evaluation_rules import EvaluationRuleEvaluatorReference + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluation_rules.create( + name="answer-correctness-live", + evaluator=EvaluationRuleEvaluatorReference( + name="answer-correctness", + scope=EvaluatorScope.PROJECT, + ), + target=EvaluationRuleTarget.OBSERVATION, + enabled=True, + sampling=1.0, + filter=[ + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + ], + mapping=[ + EvaluationRuleMapping( + variable="input", + source=EvaluationRuleMappingSource.INPUT, + ), + EvaluationRuleMapping( + variable="output", + source=EvaluationRuleMappingSource.OUTPUT, + ), + ], + ) + """ + _response = self._raw_client.create( + name=name, + evaluator=evaluator, + target=target, + enabled=enabled, + mapping=mapping, + sampling=sampling, + filter=filter, + request_options=request_options, + ) + return _response.data + + def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRules: + """ + List evaluation rules in the authenticated project. + + Each item describes one live evaluation rule and its effective runtime status. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRules + + Examples + -------- + from langfuse import LangfuseAPI + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluation_rules.list() + """ + _response = self._raw_client.list( + page=page, limit=limit, request_options=request_options + ) + return _response.data + + def get( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRule: + """ + Get one evaluation rule by its identifier. + + Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier returned by the evaluation rule endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRule + + Examples + -------- + from langfuse import LangfuseAPI + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluation_rules.get( + evaluation_rule_id="evaluationRuleId", + ) + """ + _response = self._raw_client.get( + evaluation_rule_id, request_options=request_options + ) + return _response.data + + def update( + self, + evaluation_rule_id: str, + *, + name: typing.Optional[str] = OMIT, + evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT, + target: typing.Optional[EvaluationRuleTarget] = OMIT, + enabled: typing.Optional[bool] = OMIT, + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRule: + """ + Update an evaluation rule. + + Typical uses: + - enable or disable live execution + - switch to another evaluator + - adjust sampling + - change filters + - update variable mappings + + Important behavior: + - provide only the fields you want to change + - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving + - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration + - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target + - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run + - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409` + + Recovery guidance: + - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping` + - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter` + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + name : typing.Optional[str] + Updated deployment name. + + evaluator : typing.Optional[EvaluationRuleEvaluatorReference] + Updated evaluator family. + + Langfuse resolves the provided evaluator family to its latest version before saving the rule. + + target : typing.Optional[EvaluationRuleTarget] + Updated target object type. + + enabled : typing.Optional[bool] + Updated desired enabled state. + + sampling : typing.Optional[float] + Updated sampling fraction. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Updated filter list. + + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]] + Updated variable mappings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRule + + Examples + -------- + from langfuse import LangfuseAPI + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluation_rules.update( + evaluation_rule_id="evaluationRuleId", + ) + """ + _response = self._raw_client.update( + evaluation_rule_id, + name=name, + evaluator=evaluator, + target=target, + enabled=enabled, + sampling=sampling, + filter=filter, + mapping=mapping, + request_options=request_options, + ) + return _response.data + + def delete( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> DeleteEvaluationRuleResponse: + """ + Delete an evaluation rule. + + This removes the live-ingestion rule only. It does not delete the referenced evaluator. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + DeleteEvaluationRuleResponse + + Examples + -------- + from langfuse import LangfuseAPI + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluation_rules.delete( + evaluation_rule_id="evaluationRuleId", + ) + """ + _response = self._raw_client.delete( + evaluation_rule_id, request_options=request_options + ) + return _response.data + + +class AsyncEvaluationRulesClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._raw_client = AsyncRawEvaluationRulesClient(client_wrapper=client_wrapper) + + @property + def with_raw_response(self) -> AsyncRawEvaluationRulesClient: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + AsyncRawEvaluationRulesClient + """ + return self._raw_client + + async def create( + self, + *, + name: str, + evaluator: EvaluationRuleEvaluatorReference, + target: EvaluationRuleTarget, + enabled: bool, + mapping: typing.Sequence[EvaluationRuleMapping], + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRule: + """ + Create an evaluation rule. + + An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data. + + Use this resource after choosing an evaluator from the evaluator endpoints. + + Key rules: + - `name` must be unique within the project for public evaluation rules + - `target` must be `observation` or `experiment` + - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints + - Langfuse resolves that family to its latest version before saving the evaluation rule + - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId` + - every evaluator prompt variable must be mapped exactly once + - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment` + - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run + - at most 50 evaluation rules can be effectively active in one project at the same time + + If an evaluation rule with the same `name` already exists in the project, the API returns `409`. + In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one. + + If enabling this resource would exceed the 50-active limit, the API also returns `409`. + In that case, disable or pause another active evaluation rule before enabling a new one. + + Current scope: + - evaluation rules are live-ingestion rules only + - they do not trigger historical backfills + + Recovery guidance: + - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues` + - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response + - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping` + - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable + - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name + - `400 invalid_json_path`: remove or correct the `jsonPath` + - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request. + + Parameters + ---------- + name : str + Human-readable deployment name. + + evaluator : EvaluationRuleEvaluatorReference + Evaluator family to use. + + Use `name` and `scope` from the evaluator endpoints. + Langfuse resolves that family to its latest version before saving the rule. + + target : EvaluationRuleTarget + Target object type to evaluate. + + enabled : bool + Whether the deployment should be active immediately after creation. + + mapping : typing.Sequence[EvaluationRuleMapping] + Required variable mappings. + + Every evaluator variable must appear exactly once. + Build this list from the evaluator `variables` array returned by the evaluator endpoints. + + sampling : typing.Optional[float] + Optional sampling fraction. Defaults to `1`. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Optional filter list. + + Omit or pass an empty list to evaluate all matching targets for the selected `target`. + Each filter object must use a column that is valid for that `target`. + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRule + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleMapping, + EvaluationRuleMappingSource, + EvaluationRuleOptionsFilterOperator, + EvaluationRuleTarget, + EvaluatorScope, + ) + from langfuse.unstable.evaluation_rules import EvaluationRuleEvaluatorReference + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluation_rules.create( + name="answer-correctness-live", + evaluator=EvaluationRuleEvaluatorReference( + name="answer-correctness", + scope=EvaluatorScope.PROJECT, + ), + target=EvaluationRuleTarget.OBSERVATION, + enabled=True, + sampling=1.0, + filter=[ + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + ], + mapping=[ + EvaluationRuleMapping( + variable="input", + source=EvaluationRuleMappingSource.INPUT, + ), + EvaluationRuleMapping( + variable="output", + source=EvaluationRuleMappingSource.OUTPUT, + ), + ], + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.create( + name=name, + evaluator=evaluator, + target=target, + enabled=enabled, + mapping=mapping, + sampling=sampling, + filter=filter, + request_options=request_options, + ) + return _response.data + + async def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRules: + """ + List evaluation rules in the authenticated project. + + Each item describes one live evaluation rule and its effective runtime status. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRules + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluation_rules.list() + + + asyncio.run(main()) + """ + _response = await self._raw_client.list( + page=page, limit=limit, request_options=request_options + ) + return _response.data + + async def get( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRule: + """ + Get one evaluation rule by its identifier. + + Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier returned by the evaluation rule endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRule + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluation_rules.get( + evaluation_rule_id="evaluationRuleId", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.get( + evaluation_rule_id, request_options=request_options + ) + return _response.data + + async def update( + self, + evaluation_rule_id: str, + *, + name: typing.Optional[str] = OMIT, + evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT, + target: typing.Optional[EvaluationRuleTarget] = OMIT, + enabled: typing.Optional[bool] = OMIT, + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> EvaluationRule: + """ + Update an evaluation rule. + + Typical uses: + - enable or disable live execution + - switch to another evaluator + - adjust sampling + - change filters + - update variable mappings + + Important behavior: + - provide only the fields you want to change + - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving + - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration + - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target + - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run + - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409` + + Recovery guidance: + - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping` + - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter` + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + name : typing.Optional[str] + Updated deployment name. + + evaluator : typing.Optional[EvaluationRuleEvaluatorReference] + Updated evaluator family. + + Langfuse resolves the provided evaluator family to its latest version before saving the rule. + + target : typing.Optional[EvaluationRuleTarget] + Updated target object type. + + enabled : typing.Optional[bool] + Updated desired enabled state. + + sampling : typing.Optional[float] + Updated sampling fraction. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Updated filter list. + + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]] + Updated variable mappings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + EvaluationRule + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluation_rules.update( + evaluation_rule_id="evaluationRuleId", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.update( + evaluation_rule_id, + name=name, + evaluator=evaluator, + target=target, + enabled=enabled, + sampling=sampling, + filter=filter, + mapping=mapping, + request_options=request_options, + ) + return _response.data + + async def delete( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> DeleteEvaluationRuleResponse: + """ + Delete an evaluation rule. + + This removes the live-ingestion rule only. It does not delete the referenced evaluator. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + DeleteEvaluationRuleResponse + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluation_rules.delete( + evaluation_rule_id="evaluationRuleId", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.delete( + evaluation_rule_id, request_options=request_options + ) + return _response.data diff --git a/langfuse/api/unstable/evaluation_rules/raw_client.py b/langfuse/api/unstable/evaluation_rules/raw_client.py new file mode 100644 index 000000000..f99aba663 --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/raw_client.py @@ -0,0 +1,2271 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing +from json.decoder import JSONDecodeError + +from ...commons.errors.access_denied_error import ( + AccessDeniedError as commons_errors_access_denied_error_AccessDeniedError, +) +from ...commons.errors.error import Error +from ...commons.errors.method_not_allowed_error import ( + MethodNotAllowedError as commons_errors_method_not_allowed_error_MethodNotAllowedError, +) +from ...commons.errors.not_found_error import ( + NotFoundError as commons_errors_not_found_error_NotFoundError, +) +from ...commons.errors.unauthorized_error import ( + UnauthorizedError as commons_errors_unauthorized_error_UnauthorizedError, +) +from ...core.api_error import ApiError +from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from ...core.http_response import AsyncHttpResponse, HttpResponse +from ...core.jsonable_encoder import jsonable_encoder +from ...core.pydantic_utilities import parse_obj_as +from ...core.request_options import RequestOptions +from ...core.serialization import convert_and_respect_annotation_metadata +from ..commons.types.evaluation_rule_filter import EvaluationRuleFilter +from ..commons.types.evaluation_rule_mapping import EvaluationRuleMapping +from ..commons.types.evaluation_rule_target import EvaluationRuleTarget +from ..errors.errors.access_denied_error import ( + AccessDeniedError as unstable_errors_errors_access_denied_error_AccessDeniedError, +) +from ..errors.errors.bad_request_error import BadRequestError +from ..errors.errors.conflict_error import ConflictError +from ..errors.errors.internal_server_error import InternalServerError +from ..errors.errors.method_not_allowed_error import ( + MethodNotAllowedError as unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError, +) +from ..errors.errors.not_found_error import ( + NotFoundError as unstable_errors_errors_not_found_error_NotFoundError, +) +from ..errors.errors.too_many_requests_error import TooManyRequestsError +from ..errors.errors.unauthorized_error import ( + UnauthorizedError as unstable_errors_errors_unauthorized_error_UnauthorizedError, +) +from ..errors.errors.unprocessable_content_error import UnprocessableContentError +from ..errors.types.public_api_error import PublicApiError +from .types.delete_evaluation_rule_response import DeleteEvaluationRuleResponse +from .types.evaluation_rule import EvaluationRule +from .types.evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference +from .types.evaluation_rules import EvaluationRules + +# this is used as the default value for optional parameters +OMIT = typing.cast(typing.Any, ...) + + +class RawEvaluationRulesClient: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._client_wrapper = client_wrapper + + def create( + self, + *, + name: str, + evaluator: EvaluationRuleEvaluatorReference, + target: EvaluationRuleTarget, + enabled: bool, + mapping: typing.Sequence[EvaluationRuleMapping], + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[EvaluationRule]: + """ + Create an evaluation rule. + + An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data. + + Use this resource after choosing an evaluator from the evaluator endpoints. + + Key rules: + - `name` must be unique within the project for public evaluation rules + - `target` must be `observation` or `experiment` + - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints + - Langfuse resolves that family to its latest version before saving the evaluation rule + - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId` + - every evaluator prompt variable must be mapped exactly once + - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment` + - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run + - at most 50 evaluation rules can be effectively active in one project at the same time + + If an evaluation rule with the same `name` already exists in the project, the API returns `409`. + In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one. + + If enabling this resource would exceed the 50-active limit, the API also returns `409`. + In that case, disable or pause another active evaluation rule before enabling a new one. + + Current scope: + - evaluation rules are live-ingestion rules only + - they do not trigger historical backfills + + Recovery guidance: + - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues` + - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response + - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping` + - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable + - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name + - `400 invalid_json_path`: remove or correct the `jsonPath` + - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request. + + Parameters + ---------- + name : str + Human-readable deployment name. + + evaluator : EvaluationRuleEvaluatorReference + Evaluator family to use. + + Use `name` and `scope` from the evaluator endpoints. + Langfuse resolves that family to its latest version before saving the rule. + + target : EvaluationRuleTarget + Target object type to evaluate. + + enabled : bool + Whether the deployment should be active immediately after creation. + + mapping : typing.Sequence[EvaluationRuleMapping] + Required variable mappings. + + Every evaluator variable must appear exactly once. + Build this list from the evaluator `variables` array returned by the evaluator endpoints. + + sampling : typing.Optional[float] + Optional sampling fraction. Defaults to `1`. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Optional filter list. + + Omit or pass an empty list to evaluate all matching targets for the selected `target`. + Each filter object must use a column that is valid for that `target`. + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[EvaluationRule] + """ + _response = self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluation-rules", + method="POST", + json={ + "name": name, + "evaluator": convert_and_respect_annotation_metadata( + object_=evaluator, + annotation=EvaluationRuleEvaluatorReference, + direction="write", + ), + "target": target, + "enabled": enabled, + "sampling": sampling, + "filter": convert_and_respect_annotation_metadata( + object_=filter, + annotation=typing.Sequence[EvaluationRuleFilter], + direction="write", + ), + "mapping": convert_and_respect_annotation_metadata( + object_=mapping, + annotation=typing.Sequence[EvaluationRuleMapping], + direction="write", + ), + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRule, + parse_obj_as( + type_=EvaluationRule, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 409: + raise ConflictError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 422: + raise UnprocessableContentError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[EvaluationRules]: + """ + List evaluation rules in the authenticated project. + + Each item describes one live evaluation rule and its effective runtime status. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[EvaluationRules] + """ + _response = self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluation-rules", + method="GET", + params={ + "page": page, + "limit": limit, + }, + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRules, + parse_obj_as( + type_=EvaluationRules, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + def get( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[EvaluationRule]: + """ + Get one evaluation rule by its identifier. + + Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier returned by the evaluation rule endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[EvaluationRule] + """ + _response = self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRule, + parse_obj_as( + type_=EvaluationRule, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + def update( + self, + evaluation_rule_id: str, + *, + name: typing.Optional[str] = OMIT, + evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT, + target: typing.Optional[EvaluationRuleTarget] = OMIT, + enabled: typing.Optional[bool] = OMIT, + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[EvaluationRule]: + """ + Update an evaluation rule. + + Typical uses: + - enable or disable live execution + - switch to another evaluator + - adjust sampling + - change filters + - update variable mappings + + Important behavior: + - provide only the fields you want to change + - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving + - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration + - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target + - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run + - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409` + + Recovery guidance: + - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping` + - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter` + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + name : typing.Optional[str] + Updated deployment name. + + evaluator : typing.Optional[EvaluationRuleEvaluatorReference] + Updated evaluator family. + + Langfuse resolves the provided evaluator family to its latest version before saving the rule. + + target : typing.Optional[EvaluationRuleTarget] + Updated target object type. + + enabled : typing.Optional[bool] + Updated desired enabled state. + + sampling : typing.Optional[float] + Updated sampling fraction. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Updated filter list. + + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]] + Updated variable mappings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[EvaluationRule] + """ + _response = self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}", + method="PATCH", + json={ + "name": name, + "evaluator": convert_and_respect_annotation_metadata( + object_=evaluator, + annotation=EvaluationRuleEvaluatorReference, + direction="write", + ), + "target": target, + "enabled": enabled, + "sampling": sampling, + "filter": convert_and_respect_annotation_metadata( + object_=filter, + annotation=typing.Sequence[EvaluationRuleFilter], + direction="write", + ), + "mapping": convert_and_respect_annotation_metadata( + object_=mapping, + annotation=typing.Sequence[EvaluationRuleMapping], + direction="write", + ), + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRule, + parse_obj_as( + type_=EvaluationRule, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 422: + raise UnprocessableContentError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + def delete( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[DeleteEvaluationRuleResponse]: + """ + Delete an evaluation rule. + + This removes the live-ingestion rule only. It does not delete the referenced evaluator. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[DeleteEvaluationRuleResponse] + """ + _response = self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}", + method="DELETE", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + DeleteEvaluationRuleResponse, + parse_obj_as( + type_=DeleteEvaluationRuleResponse, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + +class AsyncRawEvaluationRulesClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._client_wrapper = client_wrapper + + async def create( + self, + *, + name: str, + evaluator: EvaluationRuleEvaluatorReference, + target: EvaluationRuleTarget, + enabled: bool, + mapping: typing.Sequence[EvaluationRuleMapping], + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[EvaluationRule]: + """ + Create an evaluation rule. + + An evaluation rule defines **what** incoming data should be evaluated and **how prompt variables should be populated** from that data. + + Use this resource after choosing an evaluator from the evaluator endpoints. + + Key rules: + - `name` must be unique within the project for public evaluation rules + - `target` must be `observation` or `experiment` + - `evaluator.name` + `evaluator.scope` must identify an existing evaluator family returned by the evaluator endpoints + - Langfuse resolves that family to its latest version before saving the evaluation rule + - for `target=experiment`, use dataset `id` values from `GET /api/public/v2/datasets` when filtering by `datasetId` + - every evaluator prompt variable must be mapped exactly once + - `expected_output` and `experiment_item_metadata` mappings are only valid for `target=experiment` + - if `enabled=true`, Langfuse validates that the referenced evaluator can currently run + - at most 50 evaluation rules can be effectively active in one project at the same time + + If an evaluation rule with the same `name` already exists in the project, the API returns `409`. + In that case, update the existing resource with `PATCH /api/public/unstable/evaluation-rules/{evaluationRuleId}` instead of creating a second one. + + If enabling this resource would exceed the 50-active limit, the API also returns `409`. + In that case, disable or pause another active evaluation rule before enabling a new one. + + Current scope: + - evaluation rules are live-ingestion rules only + - they do not trigger historical backfills + + Recovery guidance: + - `400 invalid_filter_value`: fix the filter `column` or `value` using `details.column`, `details.invalidValues`, and `details.allowedValues` + - `400 invalid_filter_value` with `details.column=datasetId`: call `GET /api/public/v2/datasets`, then retry with dataset `id` values from that response + - `400 missing_variable_mapping`: fetch the evaluator again and make sure every variable in `variables` appears exactly once in `mapping` + - `400 duplicate_variable_mapping`: remove repeated mappings for the same variable + - `400 invalid_variable_mapping`: switch to a valid `source` for the selected `target`, or fix the variable name + - `400 invalid_json_path`: remove or correct the `jsonPath` + - `422 evaluator_preflight_failed`: the selected evaluator cannot run with the resolved model configuration. Fix the evaluator/default model setup, then retry the create request. + + Parameters + ---------- + name : str + Human-readable deployment name. + + evaluator : EvaluationRuleEvaluatorReference + Evaluator family to use. + + Use `name` and `scope` from the evaluator endpoints. + Langfuse resolves that family to its latest version before saving the rule. + + target : EvaluationRuleTarget + Target object type to evaluate. + + enabled : bool + Whether the deployment should be active immediately after creation. + + mapping : typing.Sequence[EvaluationRuleMapping] + Required variable mappings. + + Every evaluator variable must appear exactly once. + Build this list from the evaluator `variables` array returned by the evaluator endpoints. + + sampling : typing.Optional[float] + Optional sampling fraction. Defaults to `1`. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Optional filter list. + + Omit or pass an empty list to evaluate all matching targets for the selected `target`. + Each filter object must use a column that is valid for that `target`. + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[EvaluationRule] + """ + _response = await self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluation-rules", + method="POST", + json={ + "name": name, + "evaluator": convert_and_respect_annotation_metadata( + object_=evaluator, + annotation=EvaluationRuleEvaluatorReference, + direction="write", + ), + "target": target, + "enabled": enabled, + "sampling": sampling, + "filter": convert_and_respect_annotation_metadata( + object_=filter, + annotation=typing.Sequence[EvaluationRuleFilter], + direction="write", + ), + "mapping": convert_and_respect_annotation_metadata( + object_=mapping, + annotation=typing.Sequence[EvaluationRuleMapping], + direction="write", + ), + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRule, + parse_obj_as( + type_=EvaluationRule, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 409: + raise ConflictError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 422: + raise UnprocessableContentError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + async def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[EvaluationRules]: + """ + List evaluation rules in the authenticated project. + + Each item describes one live evaluation rule and its effective runtime status. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[EvaluationRules] + """ + _response = await self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluation-rules", + method="GET", + params={ + "page": page, + "limit": limit, + }, + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRules, + parse_obj_as( + type_=EvaluationRules, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + async def get( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[EvaluationRule]: + """ + Get one evaluation rule by its identifier. + + Use this endpoint to inspect the current evaluator, target, mapping, filters, and effective runtime status. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier returned by the evaluation rule endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[EvaluationRule] + """ + _response = await self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRule, + parse_obj_as( + type_=EvaluationRule, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + async def update( + self, + evaluation_rule_id: str, + *, + name: typing.Optional[str] = OMIT, + evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = OMIT, + target: typing.Optional[EvaluationRuleTarget] = OMIT, + enabled: typing.Optional[bool] = OMIT, + sampling: typing.Optional[float] = OMIT, + filter: typing.Optional[typing.Sequence[EvaluationRuleFilter]] = OMIT, + mapping: typing.Optional[typing.Sequence[EvaluationRuleMapping]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[EvaluationRule]: + """ + Update an evaluation rule. + + Typical uses: + - enable or disable live execution + - switch to another evaluator + - adjust sampling + - change filters + - update variable mappings + + Important behavior: + - provide only the fields you want to change + - if you provide `evaluator`, Langfuse resolves that evaluator family to its latest version before saving + - changing `target`, `filter`, or `mapping` must still produce a valid target-specific configuration + - if you change `target`, also send a compatible `filter` and `mapping` in the same request unless the existing ones are still valid for the new target + - if the resulting config is enabled, Langfuse re-validates that the selected evaluator can run + - if the update would move a non-active evaluation rule into the active state and the project already has 50 active evaluation rules, the API returns `409` + + Recovery guidance: + - if the update fails with `missing_variable_mapping` or `invalid_variable_mapping` after changing `evaluator` or `target`, resend the request with a complete new `mapping` + - if the update fails with `invalid_filter_value` after changing `target`, resend the request with a target-compatible `filter` + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + name : typing.Optional[str] + Updated deployment name. + + evaluator : typing.Optional[EvaluationRuleEvaluatorReference] + Updated evaluator family. + + Langfuse resolves the provided evaluator family to its latest version before saving the rule. + + target : typing.Optional[EvaluationRuleTarget] + Updated target object type. + + enabled : typing.Optional[bool] + Updated desired enabled state. + + sampling : typing.Optional[float] + Updated sampling fraction. + + filter : typing.Optional[typing.Sequence[EvaluationRuleFilter]] + Updated filter list. + + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + + mapping : typing.Optional[typing.Sequence[EvaluationRuleMapping]] + Updated variable mappings. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[EvaluationRule] + """ + _response = await self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}", + method="PATCH", + json={ + "name": name, + "evaluator": convert_and_respect_annotation_metadata( + object_=evaluator, + annotation=EvaluationRuleEvaluatorReference, + direction="write", + ), + "target": target, + "enabled": enabled, + "sampling": sampling, + "filter": convert_and_respect_annotation_metadata( + object_=filter, + annotation=typing.Sequence[EvaluationRuleFilter], + direction="write", + ), + "mapping": convert_and_respect_annotation_metadata( + object_=mapping, + annotation=typing.Sequence[EvaluationRuleMapping], + direction="write", + ), + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + EvaluationRule, + parse_obj_as( + type_=EvaluationRule, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 422: + raise UnprocessableContentError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + async def delete( + self, + evaluation_rule_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[DeleteEvaluationRuleResponse]: + """ + Delete an evaluation rule. + + This removes the live-ingestion rule only. It does not delete the referenced evaluator. + + Parameters + ---------- + evaluation_rule_id : str + Evaluation rule identifier. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[DeleteEvaluationRuleResponse] + """ + _response = await self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluation-rules/{jsonable_encoder(evaluation_rule_id)}", + method="DELETE", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + DeleteEvaluationRuleResponse, + parse_obj_as( + type_=DeleteEvaluationRuleResponse, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) diff --git a/langfuse/api/unstable/evaluation_rules/types/__init__.py b/langfuse/api/unstable/evaluation_rules/types/__init__.py new file mode 100644 index 000000000..2854b1237 --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/__init__.py @@ -0,0 +1,62 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .create_evaluation_rule_request import CreateEvaluationRuleRequest + from .delete_evaluation_rule_response import DeleteEvaluationRuleResponse + from .evaluation_rule import EvaluationRule + from .evaluation_rule_evaluator import EvaluationRuleEvaluator + from .evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference + from .evaluation_rules import EvaluationRules + from .update_evaluation_rule_request import UpdateEvaluationRuleRequest +_dynamic_imports: typing.Dict[str, str] = { + "CreateEvaluationRuleRequest": ".create_evaluation_rule_request", + "DeleteEvaluationRuleResponse": ".delete_evaluation_rule_response", + "EvaluationRule": ".evaluation_rule", + "EvaluationRuleEvaluator": ".evaluation_rule_evaluator", + "EvaluationRuleEvaluatorReference": ".evaluation_rule_evaluator_reference", + "EvaluationRules": ".evaluation_rules", + "UpdateEvaluationRuleRequest": ".update_evaluation_rule_request", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = [ + "CreateEvaluationRuleRequest", + "DeleteEvaluationRuleResponse", + "EvaluationRule", + "EvaluationRuleEvaluator", + "EvaluationRuleEvaluatorReference", + "EvaluationRules", + "UpdateEvaluationRuleRequest", +] diff --git a/langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py b/langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py new file mode 100644 index 000000000..9a90b227a --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/create_evaluation_rule_request.py @@ -0,0 +1,75 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from ...commons.types.evaluation_rule_filter import EvaluationRuleFilter +from ...commons.types.evaluation_rule_mapping import EvaluationRuleMapping +from ...commons.types.evaluation_rule_target import EvaluationRuleTarget +from .evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference + + +class CreateEvaluationRuleRequest(UniversalBaseModel): + """ + Request body for creating an evaluation rule. + + Checklist for agents and SDK clients: + - reference an existing evaluator family by `evaluator.name` and `evaluator.scope` + - choose `target=observation` or `target=experiment` + - if `target=experiment` and you want a dataset filter, call `GET /api/public/v2/datasets` first and use dataset `id` values in `filter[].value` + - fetch or inspect the evaluator first, then provide a complete variable mapping for every evaluator variable listed in `variables` + - optionally narrow execution with `filter` + - set `enabled=true` only when you want live execution immediately + """ + + name: str = pydantic.Field() + """ + Human-readable deployment name. + """ + + evaluator: EvaluationRuleEvaluatorReference = pydantic.Field() + """ + Evaluator family to use. + + Use `name` and `scope` from the evaluator endpoints. + Langfuse resolves that family to its latest version before saving the rule. + """ + + target: EvaluationRuleTarget = pydantic.Field() + """ + Target object type to evaluate. + """ + + enabled: bool = pydantic.Field() + """ + Whether the deployment should be active immediately after creation. + """ + + sampling: typing.Optional[float] = pydantic.Field(default=None) + """ + Optional sampling fraction. Defaults to `1`. + """ + + filter: typing.Optional[typing.List[EvaluationRuleFilter]] = pydantic.Field( + default=None + ) + """ + Optional filter list. + + Omit or pass an empty list to evaluate all matching targets for the selected `target`. + Each filter object must use a column that is valid for that `target`. + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + """ + + mapping: typing.List[EvaluationRuleMapping] = pydantic.Field() + """ + Required variable mappings. + + Every evaluator variable must appear exactly once. + Build this list from the evaluator `variables` array returned by the evaluator endpoints. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py b/langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py new file mode 100644 index 000000000..42423c3dc --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/delete_evaluation_rule_response.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel + + +class DeleteEvaluationRuleResponse(UniversalBaseModel): + """ + Confirmation response returned after successful deletion. + """ + + message: str = pydantic.Field() + """ + Always `Evaluation rule successfully deleted`. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py new file mode 100644 index 000000000..d8baee407 --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule.py @@ -0,0 +1,172 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from ...commons.types.evaluation_rule_filter import EvaluationRuleFilter +from ...commons.types.evaluation_rule_mapping import EvaluationRuleMapping +from ...commons.types.evaluation_rule_status import EvaluationRuleStatus +from ...commons.types.evaluation_rule_target import EvaluationRuleTarget +from .evaluation_rule_evaluator import EvaluationRuleEvaluator + + +class EvaluationRule(UniversalBaseModel): + """ + Live evaluation rule for incoming data. + + An evaluation rule answers: + - which evaluator should be used + - which target objects should trigger scoring + - how often scoring should run + - which target fields should populate each evaluator variable + - whether the deployment is active, inactive, or paused + + Important status semantics: + - `enabled` is the desired on/off setting from the client + - `status` is the effective runtime state after Langfuse applies validation and blocking rules + - `enabled=true` with `status=paused` means the rule should run, but Langfuse has paused it until the underlying problem is fixed + + Examples + -------- + import datetime + + from langfuse.unstable.commons import ( + EvaluationRuleFilter_StringOptions, + EvaluationRuleMapping, + EvaluationRuleMappingSource, + EvaluationRuleOptionsFilterOperator, + EvaluationRuleStatus, + EvaluationRuleTarget, + EvaluatorScope, + ) + from langfuse.unstable.evaluation_rules import ( + EvaluationRule, + EvaluationRuleEvaluator, + ) + + EvaluationRule( + id="erule_123", + name="answer-correctness-live", + evaluator=EvaluationRuleEvaluator( + id="evaltmpl_123", + name="answer-correctness", + scope=EvaluatorScope.PROJECT, + ), + target=EvaluationRuleTarget.OBSERVATION, + enabled=True, + status=EvaluationRuleStatus.ACTIVE, + sampling=1.0, + filter=[ + EvaluationRuleFilter_StringOptions( + column="type", + operator=EvaluationRuleOptionsFilterOperator.ANY_OF, + value=["GENERATION"], + ) + ], + mapping=[ + EvaluationRuleMapping( + variable="input", + source=EvaluationRuleMappingSource.INPUT, + ), + EvaluationRuleMapping( + variable="output", + source=EvaluationRuleMappingSource.OUTPUT, + ), + ], + created_at=datetime.datetime.fromisoformat( + "2026-03-30 09:20:00+00:00", + ), + updated_at=datetime.datetime.fromisoformat( + "2026-03-30 09:20:00+00:00", + ), + ) + """ + + id: str = pydantic.Field() + """ + Stable evaluation rule identifier. + """ + + name: str = pydantic.Field() + """ + Human-readable deployment name. This is independent from the evaluator name. + """ + + evaluator: EvaluationRuleEvaluator = pydantic.Field() + """ + Evaluator currently used by this rule. + + `name` and `scope` identify the evaluator family conceptually. + `id` is the currently active evaluator version in that family. + If you create a newer project version with the same evaluator name later, existing evaluation rules are moved to it automatically. + """ + + target: EvaluationRuleTarget = pydantic.Field() + """ + Target object type that should trigger scoring. + """ + + enabled: bool = pydantic.Field() + """ + Desired enabled state configured by the client. + """ + + status: EvaluationRuleStatus = pydantic.Field() + """ + Effective runtime status after Langfuse applies validation and blocking rules. + """ + + paused_reason: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="pausedReason") + ] = pydantic.Field(default=None) + """ + Machine-readable reason when `status=paused`, otherwise `null`. + """ + + paused_message: typing_extensions.Annotated[ + typing.Optional[str], FieldMetadata(alias="pausedMessage") + ] = pydantic.Field(default=None) + """ + Human-readable explanation when `status=paused`, otherwise `null`. + """ + + sampling: float = pydantic.Field() + """ + Fraction of matching target objects that should be evaluated. + + Must be greater than `0` and less than or equal to `1`. + - `1` means evaluate every matching target. + - `0.25` means evaluate approximately 25% of matching targets. + """ + + filter: typing.List[EvaluationRuleFilter] = pydantic.Field() + """ + List of filter conditions used to decide whether a target should be evaluated. + """ + + mapping: typing.List[EvaluationRuleMapping] = pydantic.Field() + """ + Variable mappings used to populate the evaluator prompt from the live target object. + """ + + created_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="createdAt") + ] = pydantic.Field() + """ + Timestamp when the evaluation rule was created. + """ + + updated_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="updatedAt") + ] = pydantic.Field() + """ + Timestamp when the evaluation rule was last updated. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py new file mode 100644 index 000000000..9d1be79de --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator.py @@ -0,0 +1,35 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from ...commons.types.evaluator_scope import EvaluatorScope + + +class EvaluationRuleEvaluator(UniversalBaseModel): + """ + Resolved evaluator currently used by the evaluation rule. + + `id` is the exact active evaluator version. + `name` and `scope` identify the evaluator family conceptually. + """ + + id: str = pydantic.Field() + """ + Identifier of the exact evaluator version currently used by the rule. + """ + + name: str = pydantic.Field() + """ + Evaluator family name. + """ + + scope: EvaluatorScope = pydantic.Field() + """ + Whether the evaluator family is project-owned or Langfuse-managed. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py new file mode 100644 index 000000000..25253182f --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rule_evaluator_reference.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from ...commons.types.evaluator_scope import EvaluatorScope + + +class EvaluationRuleEvaluatorReference(UniversalBaseModel): + """ + Evaluator family reference used when creating or updating an evaluation rule. + + `name` and `scope` are enough to identify the evaluator family in the authenticated project context. + """ + + name: str = pydantic.Field() + """ + Evaluator family name. + """ + + scope: EvaluatorScope = pydantic.Field() + """ + Whether the evaluator family is project-owned or Langfuse-managed. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py b/langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py new file mode 100644 index 000000000..cd1f74c6d --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/evaluation_rules.py @@ -0,0 +1,28 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from ....utils.pagination.types.meta_response import MetaResponse +from .evaluation_rule import EvaluationRule + + +class EvaluationRules(UniversalBaseModel): + """ + Paginated list of evaluation rules. + """ + + data: typing.List[EvaluationRule] = pydantic.Field() + """ + Evaluation rules in the current page. + """ + + meta: MetaResponse = pydantic.Field() + """ + Standard pagination metadata. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py b/langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py new file mode 100644 index 000000000..51e2d9288 --- /dev/null +++ b/langfuse/api/unstable/evaluation_rules/types/update_evaluation_rule_request.py @@ -0,0 +1,74 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from ...commons.types.evaluation_rule_filter import EvaluationRuleFilter +from ...commons.types.evaluation_rule_mapping import EvaluationRuleMapping +from ...commons.types.evaluation_rule_target import EvaluationRuleTarget +from .evaluation_rule_evaluator_reference import EvaluationRuleEvaluatorReference + + +class UpdateEvaluationRuleRequest(UniversalBaseModel): + """ + Partial update body for an evaluation rule. + + Provide only the fields you want to change. + An empty body is rejected. + + Practical guidance: + - If you only want to rename the rule or change sampling, send just those fields. + - If you change `evaluator`, send a fresh `mapping` unless you are certain the existing mapping still matches the evaluator variables. + - If you change `target`, usually send both `filter` and `mapping` in the same request. + - If you change an experiment `datasetId` filter, call `GET /api/public/v2/datasets` and use dataset `id` values from that response. + """ + + name: typing.Optional[str] = pydantic.Field(default=None) + """ + Updated deployment name. + """ + + evaluator: typing.Optional[EvaluationRuleEvaluatorReference] = pydantic.Field( + default=None + ) + """ + Updated evaluator family. + + Langfuse resolves the provided evaluator family to its latest version before saving the rule. + """ + + target: typing.Optional[EvaluationRuleTarget] = pydantic.Field(default=None) + """ + Updated target object type. + """ + + enabled: typing.Optional[bool] = pydantic.Field(default=None) + """ + Updated desired enabled state. + """ + + sampling: typing.Optional[float] = pydantic.Field(default=None) + """ + Updated sampling fraction. + """ + + filter: typing.Optional[typing.List[EvaluationRuleFilter]] = pydantic.Field( + default=None + ) + """ + Updated filter list. + + For `target=experiment`, `column=datasetId` expects dataset `id` values from `GET /api/public/v2/datasets`, not dataset names. + """ + + mapping: typing.Optional[typing.List[EvaluationRuleMapping]] = pydantic.Field( + default=None + ) + """ + Updated variable mappings. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluators/__init__.py b/langfuse/api/unstable/evaluators/__init__.py new file mode 100644 index 000000000..942109740 --- /dev/null +++ b/langfuse/api/unstable/evaluators/__init__.py @@ -0,0 +1,44 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .types import CreateEvaluatorRequest, Evaluator, Evaluators +_dynamic_imports: typing.Dict[str, str] = { + "CreateEvaluatorRequest": ".types", + "Evaluator": ".types", + "Evaluators": ".types", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = ["CreateEvaluatorRequest", "Evaluator", "Evaluators"] diff --git a/langfuse/api/unstable/evaluators/client.py b/langfuse/api/unstable/evaluators/client.py new file mode 100644 index 000000000..b7f25532a --- /dev/null +++ b/langfuse/api/unstable/evaluators/client.py @@ -0,0 +1,458 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from ...core.request_options import RequestOptions +from ..commons.types.evaluator_model_config import EvaluatorModelConfig +from ..commons.types.evaluator_output_definition import EvaluatorOutputDefinition +from .raw_client import AsyncRawEvaluatorsClient, RawEvaluatorsClient +from .types.evaluator import Evaluator +from .types.evaluators import Evaluators + +# this is used as the default value for optional parameters +OMIT = typing.cast(typing.Any, ...) + + +class EvaluatorsClient: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._raw_client = RawEvaluatorsClient(client_wrapper=client_wrapper) + + @property + def with_raw_response(self) -> RawEvaluatorsClient: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + RawEvaluatorsClient + """ + return self._raw_client + + def create( + self, + *, + name: str, + prompt: str, + output_definition: EvaluatorOutputDefinition, + model_config: typing.Optional[EvaluatorModelConfig] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> Evaluator: + """ + Create an evaluator in the authenticated project. + + Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration. + + Naming behavior: + - If this is a new evaluator name in your project, Langfuse creates version `1`. + - If the name already exists in your project, Langfuse creates the next version and returns it. + - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name. + + Recommended workflow: + 1. Create the evaluator. + 2. Read the returned `variables` array. + 3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical. + 4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`. + + Recovery guidance: + - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request. + - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry. + - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape. + + Unstable API note: + - This surface may evolve while the underlying evaluation data model is being redesigned. + + Parameters + ---------- + name : str + Evaluator name within the authenticated project. + + prompt : str + Prompt template used by the evaluator. + + output_definition : EvaluatorOutputDefinition + Structured output schema the evaluator must return. + + Always send `dataType`. + Do not send `version`; it is an internal storage detail and not part of the public request contract. + + model_config : typing.Optional[EvaluatorModelConfig] + Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Evaluator + + Examples + -------- + from langfuse import LangfuseAPI + from langfuse.unstable.commons import ( + EvaluatorModelConfig, + EvaluatorOutputDataType, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, + ) + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluators.create( + name="answer-correctness", + prompt="You are grading an answer.\n\nInput:\n{{input}}\n\nOutput:\n{{output}}\n\nReturn a score between 0 and 1.\n", + output_definition=EvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the score was assigned.", + ), + score=EvaluatorOutputFieldDefinition( + description="Correctness score between 0 and 1.", + ), + ), + model_config=EvaluatorModelConfig( + provider="openai", + model="gpt-4.1-mini", + ), + ) + """ + _response = self._raw_client.create( + name=name, + prompt=prompt, + output_definition=output_definition, + model_config=model_config, + request_options=request_options, + ) + return _response.data + + def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> Evaluators: + """ + List the evaluators available to the authenticated project. + + Important behavior: + - This endpoint returns the latest version of each available evaluator. + - Results can include evaluators from your project and Langfuse-managed evaluators. + - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Evaluators + + Examples + -------- + from langfuse import LangfuseAPI + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluators.list() + """ + _response = self._raw_client.list( + page=page, limit=limit, request_options=request_options + ) + return _response.data + + def get( + self, + evaluator_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> Evaluator: + """ + Get one evaluator by `id`. + + Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule. + + Parameters + ---------- + evaluator_id : str + Evaluator identifier returned by the evaluator endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Evaluator + + Examples + -------- + from langfuse import LangfuseAPI + + client = LangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + client.unstable.evaluators.get( + evaluator_id="evaluatorId", + ) + """ + _response = self._raw_client.get(evaluator_id, request_options=request_options) + return _response.data + + +class AsyncEvaluatorsClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._raw_client = AsyncRawEvaluatorsClient(client_wrapper=client_wrapper) + + @property + def with_raw_response(self) -> AsyncRawEvaluatorsClient: + """ + Retrieves a raw implementation of this client that returns raw responses. + + Returns + ------- + AsyncRawEvaluatorsClient + """ + return self._raw_client + + async def create( + self, + *, + name: str, + prompt: str, + output_definition: EvaluatorOutputDefinition, + model_config: typing.Optional[EvaluatorModelConfig] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> Evaluator: + """ + Create an evaluator in the authenticated project. + + Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration. + + Naming behavior: + - If this is a new evaluator name in your project, Langfuse creates version `1`. + - If the name already exists in your project, Langfuse creates the next version and returns it. + - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name. + + Recommended workflow: + 1. Create the evaluator. + 2. Read the returned `variables` array. + 3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical. + 4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`. + + Recovery guidance: + - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request. + - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry. + - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape. + + Unstable API note: + - This surface may evolve while the underlying evaluation data model is being redesigned. + + Parameters + ---------- + name : str + Evaluator name within the authenticated project. + + prompt : str + Prompt template used by the evaluator. + + output_definition : EvaluatorOutputDefinition + Structured output schema the evaluator must return. + + Always send `dataType`. + Do not send `version`; it is an internal storage detail and not part of the public request contract. + + model_config : typing.Optional[EvaluatorModelConfig] + Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Evaluator + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + from langfuse.unstable.commons import ( + EvaluatorModelConfig, + EvaluatorOutputDataType, + EvaluatorOutputDefinition_Numeric, + EvaluatorOutputFieldDefinition, + ) + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluators.create( + name="answer-correctness", + prompt="You are grading an answer.\n\nInput:\n{{input}}\n\nOutput:\n{{output}}\n\nReturn a score between 0 and 1.\n", + output_definition=EvaluatorOutputDefinition_Numeric( + data_type=EvaluatorOutputDataType.NUMERIC, + reasoning=EvaluatorOutputFieldDefinition( + description="Explain why the score was assigned.", + ), + score=EvaluatorOutputFieldDefinition( + description="Correctness score between 0 and 1.", + ), + ), + model_config=EvaluatorModelConfig( + provider="openai", + model="gpt-4.1-mini", + ), + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.create( + name=name, + prompt=prompt, + output_definition=output_definition, + model_config=model_config, + request_options=request_options, + ) + return _response.data + + async def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> Evaluators: + """ + List the evaluators available to the authenticated project. + + Important behavior: + - This endpoint returns the latest version of each available evaluator. + - Results can include evaluators from your project and Langfuse-managed evaluators. + - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Evaluators + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluators.list() + + + asyncio.run(main()) + """ + _response = await self._raw_client.list( + page=page, limit=limit, request_options=request_options + ) + return _response.data + + async def get( + self, + evaluator_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> Evaluator: + """ + Get one evaluator by `id`. + + Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule. + + Parameters + ---------- + evaluator_id : str + Evaluator identifier returned by the evaluator endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Evaluator + + Examples + -------- + import asyncio + + from langfuse import AsyncLangfuseAPI + + client = AsyncLangfuseAPI( + x_langfuse_sdk_name="YOUR_X_LANGFUSE_SDK_NAME", + x_langfuse_sdk_version="YOUR_X_LANGFUSE_SDK_VERSION", + x_langfuse_public_key="YOUR_X_LANGFUSE_PUBLIC_KEY", + username="YOUR_USERNAME", + password="YOUR_PASSWORD", + base_url="https://yourhost.com/path/to/api", + ) + + + async def main() -> None: + await client.unstable.evaluators.get( + evaluator_id="evaluatorId", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.get( + evaluator_id, request_options=request_options + ) + return _response.data diff --git a/langfuse/api/unstable/evaluators/raw_client.py b/langfuse/api/unstable/evaluators/raw_client.py new file mode 100644 index 000000000..f599e3298 --- /dev/null +++ b/langfuse/api/unstable/evaluators/raw_client.py @@ -0,0 +1,1278 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing +from json.decoder import JSONDecodeError + +from ...commons.errors.access_denied_error import ( + AccessDeniedError as commons_errors_access_denied_error_AccessDeniedError, +) +from ...commons.errors.error import Error +from ...commons.errors.method_not_allowed_error import ( + MethodNotAllowedError as commons_errors_method_not_allowed_error_MethodNotAllowedError, +) +from ...commons.errors.not_found_error import ( + NotFoundError as commons_errors_not_found_error_NotFoundError, +) +from ...commons.errors.unauthorized_error import ( + UnauthorizedError as commons_errors_unauthorized_error_UnauthorizedError, +) +from ...core.api_error import ApiError +from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from ...core.http_response import AsyncHttpResponse, HttpResponse +from ...core.jsonable_encoder import jsonable_encoder +from ...core.pydantic_utilities import parse_obj_as +from ...core.request_options import RequestOptions +from ...core.serialization import convert_and_respect_annotation_metadata +from ..commons.types.evaluator_model_config import EvaluatorModelConfig +from ..commons.types.evaluator_output_definition import EvaluatorOutputDefinition +from ..errors.errors.access_denied_error import ( + AccessDeniedError as unstable_errors_errors_access_denied_error_AccessDeniedError, +) +from ..errors.errors.bad_request_error import BadRequestError +from ..errors.errors.conflict_error import ConflictError +from ..errors.errors.internal_server_error import InternalServerError +from ..errors.errors.method_not_allowed_error import ( + MethodNotAllowedError as unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError, +) +from ..errors.errors.not_found_error import ( + NotFoundError as unstable_errors_errors_not_found_error_NotFoundError, +) +from ..errors.errors.too_many_requests_error import TooManyRequestsError +from ..errors.errors.unauthorized_error import ( + UnauthorizedError as unstable_errors_errors_unauthorized_error_UnauthorizedError, +) +from ..errors.errors.unprocessable_content_error import UnprocessableContentError +from ..errors.types.public_api_error import PublicApiError +from .types.evaluator import Evaluator +from .types.evaluators import Evaluators + +# this is used as the default value for optional parameters +OMIT = typing.cast(typing.Any, ...) + + +class RawEvaluatorsClient: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._client_wrapper = client_wrapper + + def create( + self, + *, + name: str, + prompt: str, + output_definition: EvaluatorOutputDefinition, + model_config: typing.Optional[EvaluatorModelConfig] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[Evaluator]: + """ + Create an evaluator in the authenticated project. + + Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration. + + Naming behavior: + - If this is a new evaluator name in your project, Langfuse creates version `1`. + - If the name already exists in your project, Langfuse creates the next version and returns it. + - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name. + + Recommended workflow: + 1. Create the evaluator. + 2. Read the returned `variables` array. + 3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical. + 4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`. + + Recovery guidance: + - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request. + - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry. + - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape. + + Unstable API note: + - This surface may evolve while the underlying evaluation data model is being redesigned. + + Parameters + ---------- + name : str + Evaluator name within the authenticated project. + + prompt : str + Prompt template used by the evaluator. + + output_definition : EvaluatorOutputDefinition + Structured output schema the evaluator must return. + + Always send `dataType`. + Do not send `version`; it is an internal storage detail and not part of the public request contract. + + model_config : typing.Optional[EvaluatorModelConfig] + Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[Evaluator] + """ + _response = self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluators", + method="POST", + json={ + "name": name, + "prompt": prompt, + "outputDefinition": convert_and_respect_annotation_metadata( + object_=output_definition, + annotation=EvaluatorOutputDefinition, + direction="write", + ), + "modelConfig": convert_and_respect_annotation_metadata( + object_=model_config, + annotation=typing.Optional[EvaluatorModelConfig], + direction="write", + ), + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + Evaluator, + parse_obj_as( + type_=Evaluator, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 409: + raise ConflictError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 422: + raise UnprocessableContentError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[Evaluators]: + """ + List the evaluators available to the authenticated project. + + Important behavior: + - This endpoint returns the latest version of each available evaluator. + - Results can include evaluators from your project and Langfuse-managed evaluators. + - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[Evaluators] + """ + _response = self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluators", + method="GET", + params={ + "page": page, + "limit": limit, + }, + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + Evaluators, + parse_obj_as( + type_=Evaluators, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + def get( + self, + evaluator_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[Evaluator]: + """ + Get one evaluator by `id`. + + Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule. + + Parameters + ---------- + evaluator_id : str + Evaluator identifier returned by the evaluator endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[Evaluator] + """ + _response = self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluators/{jsonable_encoder(evaluator_id)}", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + Evaluator, + parse_obj_as( + type_=Evaluator, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + +class AsyncRawEvaluatorsClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._client_wrapper = client_wrapper + + async def create( + self, + *, + name: str, + prompt: str, + output_definition: EvaluatorOutputDefinition, + model_config: typing.Optional[EvaluatorModelConfig] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[Evaluator]: + """ + Create an evaluator in the authenticated project. + + Use evaluators to define **how** Langfuse should score data: the prompt, the expected structured output, and the optional model configuration. + + Naming behavior: + - If this is a new evaluator name in your project, Langfuse creates version `1`. + - If the name already exists in your project, Langfuse creates the next version and returns it. + - When a new project version is created, existing evaluation rules in that project automatically move to the newest version for that evaluator name. + + Recommended workflow: + 1. Create the evaluator. + 2. Read the returned `variables` array. + 3. Read the returned `outputDefinition.dataType` so the client knows whether future scores will be numeric, boolean, or categorical. + 4. Create one or more evaluation rules that reference the returned evaluator family using `name` and `scope`. + + Recovery guidance: + - `422` with `code=evaluator_preflight_failed`: the evaluator cannot run with the resolved model configuration. Add a valid explicit `modelConfig`, or configure the project's default evaluation model, then retry the same request. + - `400` with `code=invalid_body`: the request shape is malformed. Use the structured `details.issues` array to fix the specific fields and retry. + - `400` with `code=invalid_body` on `outputDefinition`: send `dataType`, `reasoning.description`, and `score.description`. Do not send `version`; it is not part of the public request shape. + + Unstable API note: + - This surface may evolve while the underlying evaluation data model is being redesigned. + + Parameters + ---------- + name : str + Evaluator name within the authenticated project. + + prompt : str + Prompt template used by the evaluator. + + output_definition : EvaluatorOutputDefinition + Structured output schema the evaluator must return. + + Always send `dataType`. + Do not send `version`; it is an internal storage detail and not part of the public request contract. + + model_config : typing.Optional[EvaluatorModelConfig] + Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[Evaluator] + """ + _response = await self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluators", + method="POST", + json={ + "name": name, + "prompt": prompt, + "outputDefinition": convert_and_respect_annotation_metadata( + object_=output_definition, + annotation=EvaluatorOutputDefinition, + direction="write", + ), + "modelConfig": convert_and_respect_annotation_metadata( + object_=model_config, + annotation=typing.Optional[EvaluatorModelConfig], + direction="write", + ), + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + Evaluator, + parse_obj_as( + type_=Evaluator, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 409: + raise ConflictError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 422: + raise UnprocessableContentError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + async def list( + self, + *, + page: typing.Optional[int] = None, + limit: typing.Optional[int] = None, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[Evaluators]: + """ + List the evaluators available to the authenticated project. + + Important behavior: + - This endpoint returns the latest version of each available evaluator. + - Results can include evaluators from your project and Langfuse-managed evaluators. + - If the same evaluator name exists in both places, both are returned as separate items with different `scope` values. + + Parameters + ---------- + page : typing.Optional[int] + 1-based page number. Defaults to `1`. + + limit : typing.Optional[int] + Maximum number of items per page. Defaults to `50`. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[Evaluators] + """ + _response = await self._client_wrapper.httpx_client.request( + "api/public/unstable/evaluators", + method="GET", + params={ + "page": page, + "limit": limit, + }, + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + Evaluators, + parse_obj_as( + type_=Evaluators, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) + + async def get( + self, + evaluator_id: str, + *, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[Evaluator]: + """ + Get one evaluator by `id`. + + Use this endpoint when you want the prompt, output definition, model configuration, and derived variables for the evaluator you plan to use in an evaluation rule. + + Parameters + ---------- + evaluator_id : str + Evaluator identifier returned by the evaluator endpoints. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[Evaluator] + """ + _response = await self._client_wrapper.httpx_client.request( + f"api/public/unstable/evaluators/{jsonable_encoder(evaluator_id)}", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + Evaluator, + parse_obj_as( + type_=Evaluator, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise unstable_errors_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise unstable_errors_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise unstable_errors_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise unstable_errors_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 429: + raise TooManyRequestsError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + PublicApiError, + parse_obj_as( + type_=PublicApiError, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 400: + raise Error( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise commons_errors_unauthorized_error_UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 403: + raise commons_errors_access_denied_error_AccessDeniedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 405: + raise commons_errors_method_not_allowed_error_MethodNotAllowedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise commons_errors_not_found_error_NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Any, + parse_obj_as( + type_=typing.Any, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response.text, + ) + raise ApiError( + status_code=_response.status_code, + headers=dict(_response.headers), + body=_response_json, + ) diff --git a/langfuse/api/unstable/evaluators/types/__init__.py b/langfuse/api/unstable/evaluators/types/__init__.py new file mode 100644 index 000000000..6e7a13233 --- /dev/null +++ b/langfuse/api/unstable/evaluators/types/__init__.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +# isort: skip_file + +import typing +from importlib import import_module + +if typing.TYPE_CHECKING: + from .create_evaluator_request import CreateEvaluatorRequest + from .evaluator import Evaluator + from .evaluators import Evaluators +_dynamic_imports: typing.Dict[str, str] = { + "CreateEvaluatorRequest": ".create_evaluator_request", + "Evaluator": ".evaluator", + "Evaluators": ".evaluators", +} + + +def __getattr__(attr_name: str) -> typing.Any: + module_name = _dynamic_imports.get(attr_name) + if module_name is None: + raise AttributeError( + f"No {attr_name} found in _dynamic_imports for module name -> {__name__}" + ) + try: + module = import_module(module_name, __package__) + if module_name == f".{attr_name}": + return module + else: + return getattr(module, attr_name) + except ImportError as e: + raise ImportError( + f"Failed to import {attr_name} from {module_name}: {e}" + ) from e + except AttributeError as e: + raise AttributeError( + f"Failed to get {attr_name} from {module_name}: {e}" + ) from e + + +def __dir__(): + lazy_attrs = list(_dynamic_imports.keys()) + return sorted(lazy_attrs) + + +__all__ = ["CreateEvaluatorRequest", "Evaluator", "Evaluators"] diff --git a/langfuse/api/unstable/evaluators/types/create_evaluator_request.py b/langfuse/api/unstable/evaluators/types/create_evaluator_request.py new file mode 100644 index 000000000..7616d99ee --- /dev/null +++ b/langfuse/api/unstable/evaluators/types/create_evaluator_request.py @@ -0,0 +1,50 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from ...commons.types.evaluator_model_config import EvaluatorModelConfig +from ...commons.types.evaluator_output_definition import EvaluatorOutputDefinition + + +class CreateEvaluatorRequest(UniversalBaseModel): + """ + Request body for creating an evaluator. + + If the same `name` already exists in your project, Langfuse creates the next version and returns it. + Existing evaluation rules in the same project are then moved to that new latest version automatically. + """ + + name: str = pydantic.Field() + """ + Evaluator name within the authenticated project. + """ + + prompt: str = pydantic.Field() + """ + Prompt template used by the evaluator. + """ + + output_definition: typing_extensions.Annotated[ + EvaluatorOutputDefinition, FieldMetadata(alias="outputDefinition") + ] = pydantic.Field() + """ + Structured output schema the evaluator must return. + + Always send `dataType`. + Do not send `version`; it is an internal storage detail and not part of the public request contract. + """ + + model_config_: typing_extensions.Annotated[ + typing.Optional[EvaluatorModelConfig], FieldMetadata(alias="modelConfig") + ] = pydantic.Field(default=None) + """ + Optional explicit model configuration. Omit or set to `null` to use the project default evaluation model. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluators/types/evaluator.py b/langfuse/api/unstable/evaluators/types/evaluator.py new file mode 100644 index 000000000..8023839fc --- /dev/null +++ b/langfuse/api/unstable/evaluators/types/evaluator.py @@ -0,0 +1,118 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +import pydantic +import typing_extensions +from ....core.pydantic_utilities import UniversalBaseModel +from ....core.serialization import FieldMetadata +from ...commons.types.evaluator_model_config import EvaluatorModelConfig +from ...commons.types.evaluator_scope import EvaluatorScope +from ...commons.types.evaluator_type import EvaluatorType +from ...commons.types.public_evaluator_output_definition import ( + PublicEvaluatorOutputDefinition, +) + + +class Evaluator(UniversalBaseModel): + """ + One evaluator that can be used for scoring. + + An evaluator describes **how** to score data: + - prompt + - extracted prompt variables + - output schema + - optional explicit model configuration + + It does not define **which** live objects are evaluated. That is the job of `evaluation-rules`. + + For agent clients, the most important fields are: + - `variables`: use these exact names when building the evaluation-rule `mapping` array + - `outputDefinition`: tells you the expected score type and the evaluator's response instructions + - `modelConfig`: tells you whether the evaluator uses the project default model (`null`) or an explicit provider/model + + Versioning behavior: + - `GET /evaluators` returns the latest version of each available evaluator. + - `GET /evaluators/{id}` can return an older version. + - Evaluation rules always run against the latest version for the selected evaluator name within the same source (`project` or `managed`). + """ + + id: str = pydantic.Field() + """ + Identifier of this evaluator. + """ + + name: str = pydantic.Field() + """ + Evaluator name. + """ + + version: int = pydantic.Field() + """ + Version number of this evaluator. + """ + + scope: EvaluatorScope = pydantic.Field() + """ + Where this evaluator comes from: your project or Langfuse-managed defaults. + """ + + type: EvaluatorType = pydantic.Field() + """ + Evaluator engine type. Currently always `llm_as_judge`. + """ + + prompt: str = pydantic.Field() + """ + Prompt template used during evaluation. + """ + + variables: typing.List[str] = pydantic.Field() + """ + Variables extracted from the evaluator prompt. + + Every variable in this list must be mapped exactly once when creating an evaluation rule. + """ + + output_definition: typing_extensions.Annotated[ + PublicEvaluatorOutputDefinition, FieldMetadata(alias="outputDefinition") + ] = pydantic.Field() + """ + Structured output schema returned by this evaluator. + + Responses always include `dataType` and omit the internal output-definition `version`. + Use `dataType` to decide how future scores should be interpreted. + """ + + model_config_: typing_extensions.Annotated[ + typing.Optional[EvaluatorModelConfig], FieldMetadata(alias="modelConfig") + ] = pydantic.Field(default=None) + """ + Explicit model configuration, or `null` when the project default evaluation model is used. + """ + + evaluation_rule_count: typing_extensions.Annotated[ + int, FieldMetadata(alias="evaluationRuleCount") + ] = pydantic.Field() + """ + Number of evaluation rules in the project that currently use this evaluator version. + """ + + created_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="createdAt") + ] = pydantic.Field() + """ + Timestamp when this evaluator was created. + """ + + updated_at: typing_extensions.Annotated[ + dt.datetime, FieldMetadata(alias="updatedAt") + ] = pydantic.Field() + """ + Timestamp when this evaluator was last updated. + """ + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/evaluators/types/evaluators.py b/langfuse/api/unstable/evaluators/types/evaluators.py new file mode 100644 index 000000000..51247a66e --- /dev/null +++ b/langfuse/api/unstable/evaluators/types/evaluators.py @@ -0,0 +1,17 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ....core.pydantic_utilities import UniversalBaseModel +from ....utils.pagination.types.meta_response import MetaResponse +from .evaluator import Evaluator + + +class Evaluators(UniversalBaseModel): + data: typing.List[Evaluator] + meta: MetaResponse + + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + extra="allow", frozen=True + ) diff --git a/langfuse/api/unstable/raw_client.py b/langfuse/api/unstable/raw_client.py new file mode 100644 index 000000000..5201a5119 --- /dev/null +++ b/langfuse/api/unstable/raw_client.py @@ -0,0 +1,13 @@ +# This file was auto-generated by Fern from our API Definition. + +from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper + + +class RawUnstableClient: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._client_wrapper = client_wrapper + + +class AsyncRawUnstableClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._client_wrapper = client_wrapper