From 7043d6e195129bb5e3c0364e6ea7255a85c095d4 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 8 Dec 2025 02:10:45 +0100 Subject: [PATCH] Add example for alerts in YAML --- knowledge_base/alerts/README.md | 31 ++++++++++++ knowledge_base/alerts/databricks.yml | 21 ++++++++ .../nyc_taxi_daily_revenue.alert.yml | 50 +++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 knowledge_base/alerts/README.md create mode 100644 knowledge_base/alerts/databricks.yml create mode 100644 knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml diff --git a/knowledge_base/alerts/README.md b/knowledge_base/alerts/README.md new file mode 100644 index 0000000..900c978 --- /dev/null +++ b/knowledge_base/alerts/README.md @@ -0,0 +1,31 @@ +# SQL Alerts with Databricks Asset Bundles + +This example shows how to define SQL alerts using Databricks Asset Bundles. The alert monitors daily NYC Taxi revenue and triggers when it exceeds a threshold. + +For more information about SQL alerts, see the [Databricks documentation](https://docs.databricks.com/aws/en/sql/user/alerts/). + +## Usage + +1. Modify `databricks.yml`: + - Update the `host` field to your Databricks workspace URL + - Update the `warehouse` field to the name of your SQL warehouse + +2. Modify `resources/nyc_taxi_daily_revenue.alert.yml`: + - Update the `user_name` field under `permissions` to your email address + +3. Deploy the alert: + ```sh + databricks bundle deploy + ``` + +## Key Configuration + +The alert configuration in `resources/nyc_taxi_daily_revenue.alert.yml` includes: + +- **query_text**: SQL query that returns the metric to monitor +- **evaluation**: Defines how to evaluate the query results + - `comparison_operator`: GREATER_THAN, LESS_THAN, EQUAL, etc. + - `source.aggregation`: MAX, MIN, AVG, or SUM + - `threshold.value`: The value to compare against +- **schedule**: Uses Quartz cron syntax (e.g., `"0 0 8 * * ?"` for daily at 8 AM) +- **warehouse_id**: The SQL warehouse to execute the query diff --git a/knowledge_base/alerts/databricks.yml b/knowledge_base/alerts/databricks.yml new file mode 100644 index 0000000..d055bf1 --- /dev/null +++ b/knowledge_base/alerts/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: alerts + +include: + - resources/*.yml + +variables: + # The "warehouse_id" variable is used to reference the warehouse used by the alert. + warehouse_id: + lookup: + # Replace this with the name of your SQL warehouse. + warehouse: "Shared Unity Catalog Severless" + +workspace: + host: https://myworkspace.databricks.com + + +targets: + dev: + default: true + mode: development diff --git a/knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml b/knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml new file mode 100644 index 0000000..64c119b --- /dev/null +++ b/knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml @@ -0,0 +1,50 @@ +resources: + alerts: + nyc_taxi_daily_revenue: + permissions: + - level: CAN_MANAGE + user_name: user@company.com + + custom_summary: "Alert when NYC Taxi daily revenue exceeds threshold" + display_name: "NYC Taxi Daily Revenue Alert" + + # The SQL query that the alert will evaluate + query_text: | + SELECT + to_date(tpep_pickup_datetime) as date, + SUM(fare_amount) as amount + FROM + `samples`.`nyctaxi`.`trips` + GROUP BY + ALL + ORDER BY + 1 DESC + + # The warehouse to use for running the query + warehouse_id: ${var.warehouse_id} + + evaluation: + # Comparison operator for the threshold + comparison_operator: "GREATER_THAN" + + # Notification settings + notification: + notify_on_ok: false + retrigger_seconds: 3600 # Re-trigger after 1 hour if condition persists + + # Source defines which column and aggregation to evaluate + source: + aggregation: "MAX" + display: "amount" + name: "amount" + + # Threshold to compare against + threshold: + value: + double_value: 1000000.0 # Alert if daily revenue exceeds $1M + + # Schedule for running the alert + schedule: + pause_status: "UNPAUSED" + quartz_cron_schedule: "0 0 8 * * ?" # Run daily at 8 AM UTC + timezone_id: "UTC"