fix(llmobs): prevent dots in evaluation metric labels (#15297)

aniszoubiramar · web-flow · commit 5bcd099739c3 · 2025-11-20T11:25:25.000+01:00
## Description EVP interprets dots as nested objects (e.g., "field.1" becomes {field: {1: value}}), causing confusion for customers submitting custom evaluations with dots in label names. Add validation to reject labels containing dots and raise a clear error message directing users to use alternative naming conventions. ## Testing Added a unit test to verify the expected behavior. ## Risks None ## Additional Notes JIRA ticket: https://datadoghq.atlassian.net/browse/MLOB-4557
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
@@ -1713,6 +1713,10 @@ def submit_evaluation(
                 error = "invalid_metric_label"
                 raise ValueError("label must be the specified name of the evaluation metric.")
 
+            if "." in label:
+                error = "invalid_label_value"
+                raise ValueError("label value must not contain a '.'.")
+
             metric_type = metric_type.lower()
             if metric_type not in ("categorical", "score", "boolean"):
                 error = "invalid_metric_type"
diff --git a/releasenotes/notes/fix-prevent-dots-in-evaluation-metric-labels-ab5caab19d52d3e1.yaml b/releasenotes/notes/fix-prevent-dots-in-evaluation-metric-labels-ab5caab19d52d3e1.yaml
@@ -0,0 +1,4 @@
+fixes:
+  - |
+    LLM Observability: This fix resolves an issue where evaluation-metric labels containing dots could be interpreted as nested objects by adding validation that rejects such labels and provides a clear error message instructing users to use alternative naming conventions.
+
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
@@ -1630,6 +1630,13 @@ def test_submit_evaluation_empty_label_raises_error(llmobs, mock_llmobs_logs):
         )
 
 
+def test_submit_evaluation_label_value_with_a_period_raises_error(llmobs, mock_llmobs_logs):
+    with pytest.raises(ValueError, match="label value must not contain a '.'."):
+        llmobs.submit_evaluation(
+            span={"span_id": "123", "trace_id": "456"}, label="toxicity.0", metric_type="categorical", value="high"
+        )
+
+
 def test_submit_evaluation_incorrect_metric_type_raises_error(llmobs, mock_llmobs_logs):
     with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', or 'boolean'."):
         llmobs.submit_evaluation(

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +fixes:
 +  - |
 +    LLM Observability: This fix resolves an issue where evaluation-metric labels containing dots could be interpreted as nested objects by adding validation that rejects such labels and provides a clear error message instructing users to use alternative naming conventions.
++