diff --git a/grafana/postgres/v12/0-health-check.json b/grafana/postgres/v12/0-health-check.json index 8dc7a9d4c..cc1238ab0 100644 --- a/grafana/postgres/v12/0-health-check.json +++ b/grafana/postgres/v12/0-health-check.json @@ -19,7 +19,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1, + "id": 21, "links": [], "panels": [ { @@ -4968,6 +4968,152 @@ "title": "Max. XMIN horizon age", "type": "stat" }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "pgwatch-metrics" + }, + "description": "Percentage of total query execution time spent in JIT. (requires stat_statements_jit metric)", + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": 0 + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 25 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 50, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^pct$/", + "limit": 1, + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "pgwatch-metrics" + }, + "editorMode": "code", + "format": "table", + "group": [], + "groupBy": [ + { + "params": [ + "365d" + ], + "type": "time" + }, + { + "params": [ + "none" + ], + "type": "fill" + } + ], + "measurement": "stat_statements_jit", + "metricColumn": "none", + "orderByTime": "ASC", + "policy": "default", + "rawQuery": true, + "rawSql": "select\n (sum(total_jit - total_jit_lag) / sum(total_time - total_time_lag)) * 100 as pct\nfrom\n(\n select\n time,\n (data->>'jit_generation_time')::float8 as total_jit, lag((data->>'jit_generation_time')::float8) over w as total_jit_lag,\n (data->>'total_time')::float8 as total_time, lag((data->>'total_time')::float8) over w as total_time_lag\n from\n stat_statements\n where \n dbname = '$dbname' and $__timeFilter(time)\n window w as (order by time)\n)\nwhere total_time > total_time_lag", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [], + "type": "last" + } + ] + ], + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "tags": [ + { + "key": "dbname", + "operator": "=~", + "value": "/^$dbname$/" + } + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "JIT %", + "type": "stat" + }, { "fieldConfig": { "defaults": {}, @@ -5000,8 +5146,8 @@ "overrides": [] }, "gridPos": { - "h": 5, - "w": 13, + "h": 7, + "w": 12, "x": 0, "y": 20 }, diff --git a/grafana/postgres/v12/3-query-performance-analysis.json b/grafana/postgres/v12/3-query-performance-analysis.json index 22589024a..860868a9f 100644 --- a/grafana/postgres/v12/3-query-performance-analysis.json +++ b/grafana/postgres/v12/3-query-performance-analysis.json @@ -40,7 +40,7 @@ "type": "grafana-postgresql-datasource", "uid": "pgwatch-metrics" }, - "description": "Comprehensive query performance analysis from pg_stat_statements.\n\n**Column Guide:**\n- **Query ID** - Internal PostgreSQL query identifier (click to drill down)\n- **Total Runtime** - Total cumulative execution time\n- **Avg Runtime** - Average execution time per call\n- **Plan Time** - Total time spent planning queries\n- **Avg Plan Time** - Average planning time per call\n- **Calls** - Number of times executed\n- **Rows** - Total rows retrieved/affected\n- **Avg Rows** - Average rows per execution\n- **Shared Hit (MB)** - Buffer cache hits - data found in memory (higher is better)\n- **Shared Read (MB)** - Data read from disk into shared buffers (cache misses)\n- **Shared Written (MB)** - Data written from shared buffers to disk\n- **Temp Read (MB)** - Data read from temporary files (indicates memory pressure)\n- **Temp Written (MB)** - Data written to temporary files during large operations\n- **IO Time** - Time spent on disk I/O operations (requires track_io_timing)\n- **% DB Total** - Percentage of total database execution time\n- **Users** - Database roles that executed this query\n- **Query** - Normalized SQL text with constants replaced by placeholders", + "description": "Comprehensive query performance analysis from pg_stat_statements.\n\n**Column Guide:**\n- **Query ID** - Internal PostgreSQL query identifier (click to drill down)\n- **Total Runtime** - Total cumulative execution time\n- **% DB Total** - Percentage of total database execution time\n- **JIT %** - Percentage of total query execution time spent in JIT\n- **Avg Runtime** - Average execution time per call\n- **Plan Time** - Total time spent planning queries\n- **Avg Plan Time** - Average planning time per call\n- **Calls** - Number of times executed\n- **Rows** - Total rows retrieved/affected\n- **Avg Rows** - Average rows per execution\n- **Shared Hit (MB)** - Buffer cache hits - data found in memory (higher is better)\n- **Shared Read (MB)** - Data read from disk into shared buffers (cache misses)\n- **Shared Written (MB)** - Data written from shared buffers to disk\n- **Temp Read (MB)** - Data read from temporary files (indicates memory pressure)\n- **Temp Written (MB)** - Data written to temporary files during large operations\n- **IO Time** - Time spent on disk I/O operations (requires track_io_timing)\n- **Users** - Database roles that executed this query\n- **Query** - Normalized SQL text with constants replaced by placeholders", "fieldConfig": { "defaults": { "custom": { @@ -351,6 +351,50 @@ "value": 229 } ] + }, + { + "matcher": { + "id": "byName", + "options": "JIT Time" + }, + "properties": [ + { + "id": "unit", + "value": "ms" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "JIT %" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Users" + }, + "properties": [ + { + "id": "custom.width", + "value": 0 + } + ] } ] }, @@ -383,8 +427,25 @@ }, "format": "table", "rawQuery": true, - "rawSql": "-- Enhanced comprehensive query analysis (Fast mode)\nWITH query_stats AS (\n select\n tag_data->>'queryid' as queryid,\n max((data->>'total_time')::numeric) - min((data->>'total_time')::numeric) as total_runtime,\n (max((data->>'total_time')::numeric) - min((data->>'total_time')::numeric)) / nullif(max((data->>'calls')::int8) - min((data->>'calls')::int8), 0) as avg_runtime,\n max((data->>'total_plan_time')::numeric) - min((data->>'total_plan_time')::numeric) as plan_time,\n (max((data->>'total_plan_time')::numeric) - min((data->>'total_plan_time')::numeric)) / nullif(max((data->>'calls')::int8) - min((data->>'calls')::int8), 0) as avg_plan_time,\n max((data->>'calls')::int8) - min((data->>'calls')::int8) as calls,\n max((data->>'rows')::int8) - min((data->>'rows')::int8) as rows,\n (max((data->>'rows')::int8) - min((data->>'rows')::int8))::numeric / nullif(max((data->>'calls')::int8) - min((data->>'calls')::int8), 0) as avg_rows,\n (max((data->>'shared_blks_hit')::int8) - min((data->>'shared_blks_hit')::int8)) * 8192 / 1024.0 / 1024.0 as shared_hit_mb,\n (max((data->>'shared_blks_read')::int8) - min((data->>'shared_blks_read')::int8)) * 8192 / 1024.0 / 1024.0 as shared_read_mb,\n (max((data->>'shared_blks_written')::int8) - min((data->>'shared_blks_written')::int8)) * 8192 / 1024.0 / 1024.0 as shared_written_mb,\n (max((data->>'temp_blks_read')::int8) - min((data->>'temp_blks_read')::int8)) * 8192 / 1024.0 / 1024.0 as temp_read_mb,\n (max((data->>'temp_blks_written')::int8) - min((data->>'temp_blks_written')::int8)) * 8192 / 1024.0 / 1024.0 as temp_written_mb,\n (max((data->>'blk_read_time')::numeric) - min((data->>'blk_read_time')::numeric)) + \n (max((data->>'blk_write_time')::numeric) - min((data->>'blk_write_time')::numeric)) as io_time,\n case when length(tag_data->>'query') > 150 then (tag_data->>'query')::varchar(150) || '...' else tag_data->>'query' end as query\n from stat_statements\n where dbname = '$dbname'\n and $__timeFilter(time)\n and tag_data->>'query' ~* '$query_filter_regex'\n group by tag_data->>'queryid', tag_data->>'query'\n having max((data->>'calls')::int8) - min((data->>'calls')::int8) > 0\n),\nwith_percentages AS (\n select \n *,\n 100 * total_runtime / nullif((select sum(total_runtime) from query_stats), 0) as pct_db_total\n from query_stats\n)\nselect\n queryid,\n total_runtime::int8 as \"Total Runtime\",\n avg_runtime::numeric(10,2) as \"Avg Runtime\",\n pct_db_total::numeric(5,1) as \"% DB Total\",\n plan_time::int8 as \"Plan Time\",\n avg_plan_time::numeric(10,2) as \"Avg Plan Time\",\n calls as \"Calls\",\n rows as \"Rows\",\n avg_rows::numeric(10,1) as \"Avg Rows\",\n shared_hit_mb::numeric(10,1) as \"Shared Hit\",\n shared_read_mb::numeric(10,1) as \"Shared Read\",\n shared_written_mb::numeric(10,1) as \"Shared Written\",\n temp_read_mb::numeric(10,1) as \"Temp Read\",\n temp_written_mb::numeric(10,1) as \"Temp Written\",\n io_time::numeric(10,1) as \"IO Time\",\n (select data->>'users' from stat_statements where $__timeFilter(time) and dbname = '$dbname' and tag_data->>'queryid' = with_percentages.queryid order by time desc limit 1) as \"Users\",\n query as \"Query\"\nfrom with_percentages\norder by total_runtime desc nulls last\nlimit $top", - "refId": "A" + "rawSql": "-- Enhanced comprehensive query analysis (Fast mode)\nWITH query_stats AS (\n select\n tag_data->>'queryid' as queryid,\n max((data->>'total_time')::numeric) - min((data->>'total_time')::numeric) as total_runtime,\n (max((data->>'total_time')::numeric) - min((data->>'total_time')::numeric)) / nullif(max((data->>'calls')::int8) - min((data->>'calls')::int8), 0) as avg_runtime,\n max((data->>'total_plan_time')::numeric) - min((data->>'total_plan_time')::numeric) as plan_time,\n (max((data->>'total_plan_time')::numeric) - min((data->>'total_plan_time')::numeric)) / nullif(max((data->>'calls')::int8) - min((data->>'calls')::int8), 0) as avg_plan_time,\n max((data->>'calls')::int8) - min((data->>'calls')::int8) as calls,\n max((data->>'rows')::int8) - min((data->>'rows')::int8) as rows,\n (max((data->>'rows')::int8) - min((data->>'rows')::int8))::numeric / nullif(max((data->>'calls')::int8) - min((data->>'calls')::int8), 0) as avg_rows,\n (max((data->>'shared_blks_hit')::int8) - min((data->>'shared_blks_hit')::int8)) * 8192 / 1024.0 / 1024.0 as shared_hit_mb,\n (max((data->>'shared_blks_read')::int8) - min((data->>'shared_blks_read')::int8)) * 8192 / 1024.0 / 1024.0 as shared_read_mb,\n (max((data->>'shared_blks_written')::int8) - min((data->>'shared_blks_written')::int8)) * 8192 / 1024.0 / 1024.0 as shared_written_mb,\n (max((data->>'temp_blks_read')::int8) - min((data->>'temp_blks_read')::int8)) * 8192 / 1024.0 / 1024.0 as temp_read_mb,\n (max((data->>'temp_blks_written')::int8) - min((data->>'temp_blks_written')::int8)) * 8192 / 1024.0 / 1024.0 as temp_written_mb,\n (max((data->>'blk_read_time')::numeric) - min((data->>'blk_read_time')::numeric)) + \n (max((data->>'blk_write_time')::numeric) - min((data->>'blk_write_time')::numeric)) as io_time,\n (max((data->>'jit_generation_time')::numeric) - min((data->>'jit_generation_time')::numeric)) as jit_time,\n case when length(tag_data->>'query') > 150 then (tag_data->>'query')::varchar(150) || '...' else tag_data->>'query' end as query\n from stat_statements\n where dbname = '$dbname'\n and $__timeFilter(time)\n and tag_data->>'query' ~* '$query_filter_regex'\n group by tag_data->>'queryid', tag_data->>'query'\n having max((data->>'calls')::int8) - min((data->>'calls')::int8) > 0\n),\nwith_percentages AS (\n select \n *,\n 100 * total_runtime / nullif((select sum(total_runtime) from query_stats), 0) as pct_db_total\n from query_stats\n)\nselect\n queryid,\n total_runtime::numeric as \"Total Runtime\",\n pct_db_total::numeric(5,1) as \"% DB Total\",\n case when total_runtime::int8 = 0 then 0 else jit_time::numeric(10, 2) / total_runtime::int8 end as \"JIT %\",\n avg_runtime::numeric(10,2) as \"Avg Runtime\",\n plan_time::numeric as \"Plan Time\",\n avg_plan_time::numeric(10,2) as \"Avg Plan Time\",\n calls as \"Calls\",\n rows as \"Rows\",\n avg_rows::numeric(10,1) as \"Avg Rows\",\n shared_hit_mb::numeric(10,1) as \"Shared Hit\",\n shared_read_mb::numeric(10,1) as \"Shared Read\",\n shared_written_mb::numeric(10,1) as \"Shared Written\",\n temp_read_mb::numeric(10,1) as \"Temp Read\",\n temp_written_mb::numeric(10,1) as \"Temp Written\",\n io_time::numeric(10,1) as \"IO Time\",\n (select data->>'users' from stat_statements where $__timeFilter(time) and dbname = '$dbname' and tag_data->>'queryid' = with_percentages.queryid order by time desc limit 1) as \"Users\",\n query as \"Query\"\nfrom with_percentages\norder by total_runtime desc nulls last\nlimit $top", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } } ], "title": "Enhanced Query Performance Analysis Table", diff --git a/grafana/postgres/v12/global-health.json b/grafana/postgres/v12/global-health.json index dd5acd43b..deac19545 100644 --- a/grafana/postgres/v12/global-health.json +++ b/grafana/postgres/v12/global-health.json @@ -1966,15 +1966,210 @@ ], "type": "table" }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "pgwatch-metrics" + }, + "description": "Top $top_limit sources by the percentage of total query execution time spent in JIT. (requires stat_statements metric).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "pct" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "custom.align" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 0 + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 5 + } + ] + } + }, + { + "id": "displayName", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "dbname" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "Go to 'System Stats' dash", + "url": "/d/system-stats?var-dbname=${__value.text}" + } + ] + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 22 + }, + "id": 17, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "pgwatch-metrics" + }, + "editorMode": "code", + "format": "table", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "select \n dbname, pct\nfrom (\n select \n dbname,\n (sum(total_jit - total_jit_lag) / sum(total_time - total_time_lag)) * 100 as pct\n from\n (\n select\n dbname,\n (data->>'jit_generation_time')::float8 as total_jit, lag((data->>'jit_generation_time')::float8) over w as total_jit_lag,\n (data->>'total_time')::float8 as total_time, lag((data->>'total_time')::float8) over w as total_time_lag\n from stat_statements\n where $__timeFilter(time)\n window w as (order by time)\n )\n where total_time > total_time_lag\n group by 1\n)\nwhere pct > 0\norder by 2 desc\nlimit $top_limit", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Top $top_limit by JIT %", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + }, { "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 8, - "x": 0, + "x": 8, "y": 22 }, "id": 13, diff --git a/internal/metrics/metrics.yaml b/internal/metrics/metrics.yaml index e42bd9ba0..78a6f7cd5 100644 --- a/internal/metrics/metrics.yaml +++ b/internal/metrics/metrics.yaml @@ -2442,10 +2442,6 @@ metrics: WITH /* pgwatch_generated */ q_data AS ( SELECT queryid::text AS tag_queryid, - /* - if security conscious about exposing query texts replace the below expression with a dash ('-') OR - use the stat_statements_no_query_text metric instead, created specifically for this use case. - */ array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, sum(s.calls)::int8 AS calls, round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, @@ -2463,6 +2459,11 @@ metrics: sum(wal_fpi)::int8 AS wal_fpi, sum(wal_bytes)::int8 AS wal_bytes, round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + round(sum(s.jit_generation_time)::numeric, 3)::double precision as jit_generation_time, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ max(query::varchar(8000)) AS query FROM pg_stat_statements s @@ -2509,6 +2510,7 @@ metrics: b.wal_fpi, b.wal_bytes, b.total_plan_time, + b.jit_generation_time, ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query FROM ( SELECT @@ -2585,15 +2587,24 @@ metrics: temp_blks_written > 0 ORDER BY temp_blks_written DESC - LIMIT 100) a) b + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + jit_generation_time > 0 + ORDER BY + jit_generation_time DESC + LIMIT 100) a) b; 17: |- WITH /* pgwatch_generated */ q_data AS ( SELECT queryid::text AS tag_queryid, - /* - NB! if security conscious about exposing query texts replace the below expression with a dash ('-') OR - use the stat_statements_no_query_text metric instead, created specifically for this use case. - */ array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, sum(s.calls)::int8 AS calls, round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, @@ -2611,6 +2622,11 @@ metrics: sum(wal_fpi)::int8 AS wal_fpi, sum(wal_bytes)::int8 AS wal_bytes, round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + round(sum(s.jit_generation_time)::numeric, 3)::double precision as jit_generation_time, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ max(query::varchar(8000)) AS query FROM pg_stat_statements s @@ -2657,6 +2673,7 @@ metrics: b.wal_fpi, b.wal_bytes, b.total_plan_time, + b.jit_generation_time, ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query FROM ( SELECT @@ -2733,6 +2750,19 @@ metrics: temp_blks_written > 0 ORDER BY temp_blks_written DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + jit_generation_time > 0 + ORDER BY + jit_generation_time DESC LIMIT 100) a) b; stat_statements_calls: description: >