From 7043d6e195129bb5e3c0364e6ea7255a85c095d4 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 8 Dec 2025 02:10:45 +0100 Subject: [PATCH 1/2] Add example for alerts in YAML --- knowledge_base/alerts/README.md | 31 ++++++++++++ knowledge_base/alerts/databricks.yml | 21 ++++++++ .../nyc_taxi_daily_revenue.alert.yml | 50 +++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 knowledge_base/alerts/README.md create mode 100644 knowledge_base/alerts/databricks.yml create mode 100644 knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml diff --git a/knowledge_base/alerts/README.md b/knowledge_base/alerts/README.md new file mode 100644 index 0000000..900c978 --- /dev/null +++ b/knowledge_base/alerts/README.md @@ -0,0 +1,31 @@ +# SQL Alerts with Databricks Asset Bundles + +This example shows how to define SQL alerts using Databricks Asset Bundles. The alert monitors daily NYC Taxi revenue and triggers when it exceeds a threshold. + +For more information about SQL alerts, see the [Databricks documentation](https://docs.databricks.com/aws/en/sql/user/alerts/). + +## Usage + +1. Modify `databricks.yml`: + - Update the `host` field to your Databricks workspace URL + - Update the `warehouse` field to the name of your SQL warehouse + +2. Modify `resources/nyc_taxi_daily_revenue.alert.yml`: + - Update the `user_name` field under `permissions` to your email address + +3. Deploy the alert: + ```sh + databricks bundle deploy + ``` + +## Key Configuration + +The alert configuration in `resources/nyc_taxi_daily_revenue.alert.yml` includes: + +- **query_text**: SQL query that returns the metric to monitor +- **evaluation**: Defines how to evaluate the query results + - `comparison_operator`: GREATER_THAN, LESS_THAN, EQUAL, etc. + - `source.aggregation`: MAX, MIN, AVG, or SUM + - `threshold.value`: The value to compare against +- **schedule**: Uses Quartz cron syntax (e.g., `"0 0 8 * * ?"` for daily at 8 AM) +- **warehouse_id**: The SQL warehouse to execute the query diff --git a/knowledge_base/alerts/databricks.yml b/knowledge_base/alerts/databricks.yml new file mode 100644 index 0000000..d055bf1 --- /dev/null +++ b/knowledge_base/alerts/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: alerts + +include: + - resources/*.yml + +variables: + # The "warehouse_id" variable is used to reference the warehouse used by the alert. + warehouse_id: + lookup: + # Replace this with the name of your SQL warehouse. + warehouse: "Shared Unity Catalog Severless" + +workspace: + host: https://myworkspace.databricks.com + + +targets: + dev: + default: true + mode: development diff --git a/knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml b/knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml new file mode 100644 index 0000000..64c119b --- /dev/null +++ b/knowledge_base/alerts/resources/nyc_taxi_daily_revenue.alert.yml @@ -0,0 +1,50 @@ +resources: + alerts: + nyc_taxi_daily_revenue: + permissions: + - level: CAN_MANAGE + user_name: user@company.com + + custom_summary: "Alert when NYC Taxi daily revenue exceeds threshold" + display_name: "NYC Taxi Daily Revenue Alert" + + # The SQL query that the alert will evaluate + query_text: | + SELECT + to_date(tpep_pickup_datetime) as date, + SUM(fare_amount) as amount + FROM + `samples`.`nyctaxi`.`trips` + GROUP BY + ALL + ORDER BY + 1 DESC + + # The warehouse to use for running the query + warehouse_id: ${var.warehouse_id} + + evaluation: + # Comparison operator for the threshold + comparison_operator: "GREATER_THAN" + + # Notification settings + notification: + notify_on_ok: false + retrigger_seconds: 3600 # Re-trigger after 1 hour if condition persists + + # Source defines which column and aggregation to evaluate + source: + aggregation: "MAX" + display: "amount" + name: "amount" + + # Threshold to compare against + threshold: + value: + double_value: 1000000.0 # Alert if daily revenue exceeds $1M + + # Schedule for running the alert + schedule: + pause_status: "UNPAUSED" + quartz_cron_schedule: "0 0 8 * * ?" # Run daily at 8 AM UTC + timezone_id: "UTC" From d4a4275ce37bb3efcf43e3154f21bb47c24d036b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 10 Dec 2025 04:08:40 +0100 Subject: [PATCH 2/2] Add .dbalert.json example for alerts --- .../resources/high_value_orders.alert.yml | 10 +++++ .../resources/high_value_orders.dbalert.json | 43 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 knowledge_base/alerts/resources/high_value_orders.alert.yml create mode 100644 knowledge_base/alerts/resources/high_value_orders.dbalert.json diff --git a/knowledge_base/alerts/resources/high_value_orders.alert.yml b/knowledge_base/alerts/resources/high_value_orders.alert.yml new file mode 100644 index 0000000..bef4924 --- /dev/null +++ b/knowledge_base/alerts/resources/high_value_orders.alert.yml @@ -0,0 +1,10 @@ +resources: + alerts: + high_value_orders: + permissions: + - level: CAN_MANAGE + user_name: user@company.com + + warehouse_id: ${var.warehouse_id} + display_name: "High Value Urgent Orders Alert" + file_path: ./high_value_orders.dbalert.json diff --git a/knowledge_base/alerts/resources/high_value_orders.dbalert.json b/knowledge_base/alerts/resources/high_value_orders.dbalert.json new file mode 100644 index 0000000..35a8f77 --- /dev/null +++ b/knowledge_base/alerts/resources/high_value_orders.dbalert.json @@ -0,0 +1,43 @@ +{ + "custom_summary": "Alert when high-value urgent orders are detected", + "custom_description_lines": [ + "This alert monitors urgent orders (priority 1-URGENT) in the TPC-H dataset", + "and triggers when the total order value exceeds a threshold.", + "Use this to track potentially important customer orders that need immediate attention." + ], + "evaluation": { + "comparison_operator": "GREATER_THAN", + "notification": { + "notify_on_ok": false, + "retrigger_seconds": 7200 + }, + "source": { + "aggregation": "MAX", + "display": "total_urgent_value", + "name": "total_urgent_value" + }, + "threshold": { + "value": { + "double_value": 500000.0 + } + } + }, + "query_lines": [ + "SELECT", + " o_orderpriority,", + " COUNT(*) as order_count,", + " SUM(o_totalprice) as total_urgent_value", + "FROM", + " samples.tpch.orders", + "WHERE", + " o_orderpriority = '1-URGENT'", + " AND o_orderdate >= CURRENT_DATE() - INTERVAL 7 DAYS", + "GROUP BY", + " o_orderpriority" + ], + "schedule": { + "pause_status": "UNPAUSED", + "quartz_cron_schedule": "0 0 */6 * * ?", + "timezone_id": "UTC" + } +}