From 57ccbb04ab5139cd2a60eeac37d2501724f907d5 Mon Sep 17 00:00:00 2001 From: Joost Boonzajer Flaes Date: Fri, 13 Mar 2026 09:07:59 +0200 Subject: [PATCH 1/3] fix: show per-dimension alert descriptions when multiple dimensions fail When a dimension_anomalies (or volume_anomalies with dimensions) test detects failures across multiple dimension values, the alert description previously only showed the last row's description. Now it shows each failing dimension's details individually (up to 5), or a count summary with a sample of dimension values when more than 5 dimensions fail. Co-Authored-By: Claude Sonnet 4.6 --- .../tests/test_dimension_anomalies.py | 73 +++++++++++++++++++ .../store_anomaly_test_results.sql | 28 +++++-- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py index 51d7a05b0..9e0e9275a 100644 --- a/integration_tests/tests/test_dimension_anomalies.py +++ b/integration_tests/tests/test_dimension_anomalies.py @@ -315,3 +315,76 @@ def test_anomaly_in_detection_period( ) assert test_result["status"] == expected_status + + +def test_dimension_anomalies_alert_description_few_failures( + test_id: str, dbt_project: DbtProject +): + """When ≤5 dimension values fail, description shows each one's anomaly details.""" + utc_today = datetime.utcnow().date() + test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1)) + + # 3 dimension values all spike on test_date (training: 1/day, test: 10/day) + anomalous_dimensions = ["Batman", "Superman", "Spiderman"] + + data: List[Dict[str, Any]] = [ + {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero} + for hero in anomalous_dimensions + for _ in range(10) + ] + data += [ + {TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero} + for cur_date in training_dates + for hero in anomalous_dimensions + ] + + test_args = { + "timestamp_column": TIMESTAMP_COLUMN, + "dimensions": ["superhero"], + "sensitivity": 2, + } + test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data) + assert test_result["status"] == "fail" + + description = test_result["test_results_description"] + # Each failing dimension value should appear in the description + for hero in anomalous_dimensions: + assert hero in description, f"Expected '{hero}' in description: {description}" + # Should NOT show the high-volume summary message + assert "dimension values are anomalous" not in description + + +def test_dimension_anomalies_alert_description_many_failures( + test_id: str, dbt_project: DbtProject +): + """When >5 dimension values fail, description shows a count summary.""" + utc_today = datetime.utcnow().date() + test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1)) + + # 6 dimension values all spike on test_date (>5 threshold) + anomalous_dimensions = ["Batman", "Superman", "Spiderman", "IronMan", "Thor", "Hulk"] + + data: List[Dict[str, Any]] = [ + {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero} + for hero in anomalous_dimensions + for _ in range(10) + ] + data += [ + {TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero} + for cur_date in training_dates + for hero in anomalous_dimensions + ] + + test_args = { + "timestamp_column": TIMESTAMP_COLUMN, + "dimensions": ["superhero"], + "sensitivity": 2, + } + test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data) + assert test_result["status"] == "fail" + + description = test_result["test_results_description"] + # Should show the count summary for many failures + assert "dimension values are anomalous" in description, ( + f"Expected summary message in description: {description}" + ) diff --git a/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql b/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql index ac62f3d4a..6d173a580 100644 --- a/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql +++ b/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql @@ -74,23 +74,37 @@ and upper(column_name) = upper({{ elementary.const_as_string(column_name) }}) {%- endif %} {%- endset -%} - {% set test_results_description %} - {% if rows_with_score %} - {{ elementary.insensitive_get_dict_value(rows_with_score[-1], 'anomaly_description') }} - {% else %} - Not enough data to calculate anomaly score. - {% endif %} - {% endset %} {% set failures = namespace(data=0) %} {% set filtered_anomaly_scores_rows = [] %} + {% set anomalous_rows = [] %} {% for row in anomaly_scores_rows %} {% if row.anomaly_score is not none %} {% do filtered_anomaly_scores_rows.append(row) %} {% if row.is_anomalous %} {% set failures.data = failures.data + 1 %} + {% do anomalous_rows.append(row) %} {% endif %} {% endif %} {% endfor %} + {%- set max_dimension_alerts = 5 -%} + {% set test_results_description %} + {%- if rows_with_score -%} + {%- set sample_row = rows_with_score[-1] -%} + {%- set row_dimension = elementary.insensitive_get_dict_value(sample_row, "dimension") -%} + {%- if row_dimension is not none and anomalous_rows | length > 0 -%} + {%- if anomalous_rows | length > max_dimension_alerts -%} + {%- set remaining = (anomalous_rows | length) - max_dimension_alerts -%} + {{ anomalous_rows | length }} dimension values are anomalous. Showing first {{ max_dimension_alerts }}: {% for row in anomalous_rows[:max_dimension_alerts] %}{{ elementary.insensitive_get_dict_value(row, "dimension_value") }}{% if not loop.last %}, {% endif %}{% endfor %}, and {{ remaining }} more. + {%- else -%} + {% for row in anomalous_rows %}{{ elementary.insensitive_get_dict_value(row, "anomaly_description") }}{% if not loop.last %} | {% endif %}{% endfor %} + {%- endif -%} + {%- else -%} + {{ elementary.insensitive_get_dict_value(rows_with_score[-1], "anomaly_description") }} + {%- endif -%} + {%- else -%} + Not enough data to calculate anomaly score. + {%- endif -%} + {% endset %} {% set test_result_dict = { "id": elementary.insensitive_get_dict_value(latest_row, "id"), "data_issue_id": elementary.insensitive_get_dict_value( From 0eecf7d3b35b27452f0775a8dbbafba36e4a8fbb Mon Sep 17 00:00:00 2001 From: Joost Boonzajer Flaes Date: Sun, 15 Mar 2026 10:20:08 +0200 Subject: [PATCH 2/3] fix: black formatting and Vertica Decimal special-value handling - Reformat anomalous_dimensions list to satisfy Black 88-char line limit - Fix TypeError in adapter_query_runner when Vertica returns Decimal special values (Infinity/NaN): as_tuple().exponent is a string ('F'/'n') for those cases, not an int, causing '>= not supported between str and int' Co-Authored-By: Claude Sonnet 4.6 --- integration_tests/tests/adapter_query_runner.py | 9 +++++++-- integration_tests/tests/test_dimension_anomalies.py | 9 ++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/integration_tests/tests/adapter_query_runner.py b/integration_tests/tests/adapter_query_runner.py index 6ac9d96ff..c45cbb83a 100644 --- a/integration_tests/tests/adapter_query_runner.py +++ b/integration_tests/tests/adapter_query_runner.py @@ -52,9 +52,14 @@ def _serialize_value(val: Any) -> Any: * Everything else is returned unchanged. """ if isinstance(val, Decimal): - # Match the Jinja macro: normalize, then int or float + # Match the Jinja macro: normalize, then int or float. + # Note: for special values (Infinity, NaN), as_tuple().exponent is a + # string ('F' or 'n'), not an int — convert those directly to float. normalized = val.normalize() - if normalized.as_tuple().exponent >= 0: + exponent = normalized.as_tuple().exponent + if isinstance(exponent, str): + return float(normalized) + if exponent >= 0: return int(normalized) return float(normalized) if isinstance(val, (datetime, date, time)): diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py index 9e0e9275a..7e9909020 100644 --- a/integration_tests/tests/test_dimension_anomalies.py +++ b/integration_tests/tests/test_dimension_anomalies.py @@ -362,7 +362,14 @@ def test_dimension_anomalies_alert_description_many_failures( test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1)) # 6 dimension values all spike on test_date (>5 threshold) - anomalous_dimensions = ["Batman", "Superman", "Spiderman", "IronMan", "Thor", "Hulk"] + anomalous_dimensions = [ + "Batman", + "Superman", + "Spiderman", + "IronMan", + "Thor", + "Hulk", + ] data: List[Dict[str, Any]] = [ {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero} From 892db6041100d783090da4fa2db304240f05aa7a Mon Sep 17 00:00:00 2001 From: Joost Boonzajer Flaes Date: Sun, 15 Mar 2026 11:13:52 +0200 Subject: [PATCH 3/3] fix: apply black 22.12.0 formatting to test assertion Co-Authored-By: Claude Sonnet 4.6 --- integration_tests/tests/test_dimension_anomalies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py index 7e9909020..c68fc62fe 100644 --- a/integration_tests/tests/test_dimension_anomalies.py +++ b/integration_tests/tests/test_dimension_anomalies.py @@ -392,6 +392,6 @@ def test_dimension_anomalies_alert_description_many_failures( description = test_result["test_results_description"] # Should show the count summary for many failures - assert "dimension values are anomalous" in description, ( - f"Expected summary message in description: {description}" - ) + assert ( + "dimension values are anomalous" in description + ), f"Expected summary message in description: {description}"