1+ ---
12# Use something like this to check for metrics:
23# count by (app_kubernetes_io_name, app_kubernetes_io_instance, pod) ({app_kubernetes_io_name!="",pod!=""})
34#
45# Products metrics covered by the ServiceMonitors below. The list also includes whether the
56# ServiceMonitor scrapes native metrics or a statsd/JMX exporter.
67#
7- # [x] Airflow - exporter
8- # [x] Druid - native
9- # [x] HBase - native
10- # [x] Hadoop HDFS - native
11- # [x] Hive - exporter
12- # [x] Kafka - exporter
13- # [x] NiFi 1 - native
14- # [x] NiFi 2 - native
15- # [x] OpenSearch - native
16- # [ ] Spark - native - partially done, see comment on it below
17- # [x] Superset - exporter
18- # [x] Trino - native
19- # [x] ZooKeeper - native
20- # [x] OPA - native
21- ---
8+ #
9+ # Utilize `prometheus.io/scheme`, `prometheus.io/port`, `prometheus.io/path` (and optionally `prometheus.io/clusterdomain`)
10+ # annotations set by the operators to scrape all Stackable products.
11+ #
12+ # [x] Airflow - relabel drop filter on airflow container
13+ # [x] Druid
14+ # [x] HBase
15+ # [X] Hadoop HDFS - relabel drop filter on empty container
16+ # [x] Hive
17+ # [x] Kafka - TODO: listener services have metrics?
18+ # [x] NiFi 1 + 2
19+ # [x] OpenSearch
20+ # [x] Spark: Connect, HistoryServer
21+ # [x] Superset - relabel drop filter on superset container
22+ # [x] Trino
23+ # [x] ZooKeeper
24+ # [x] OPA
2225apiVersion : monitoring.coreos.com/v1
2326kind : ServiceMonitor
2427metadata :
@@ -33,218 +36,47 @@ spec:
3336 matchLabels :
3437 stackable.tech/vendor : Stackable
3538 prometheus.io/scrape : " true"
36- matchExpressions :
37- - key : app.kubernetes.io/name
38- operator : In
39- values :
40- - airflow
41- - druid
42- - hive
43- - nifi # This only works for NiFi 1, NiFi 2 has a special ServiceMonitor below
44- - opa
45- - superset
46- - trino
47- endpoints :
48- - scheme : http
49- port : metrics
50- path : /metrics
51- podTargetLabels :
52- - app.kubernetes.io/name
53- - app.kubernetes.io/instance
54- - app.kubernetes.io/component
55- - app.kubernetes.io/role-group
56- - app.kubernetes.io/version
57- ---
58- apiVersion : monitoring.coreos.com/v1
59- kind : ServiceMonitor
60- metadata :
61- name : stackable-native-metrics
62- labels :
63- stackable.tech/vendor : Stackable
64- release : prometheus
65- spec :
66- namespaceSelector :
67- any : true
68- selector :
69- matchLabels :
70- stackable.tech/vendor : Stackable
71- prometheus.io/scrape : " true"
72- matchExpressions :
73- - key : app.kubernetes.io/name
74- operator : In
75- values :
76- - zookeeper
77- endpoints :
78- - scheme : http
79- port : native-metrics
80- path : /metrics
81- podTargetLabels :
82- - app.kubernetes.io/name
83- - app.kubernetes.io/instance
84- - app.kubernetes.io/component
85- - app.kubernetes.io/role-group
86- - app.kubernetes.io/version
87- ---
88- # Kafka is special in that the operator totally messes up services:
89- # 1. The metrics Service is missing
90- # 2. The role level simple-kafka-broker-default has the prometheus.io/scrape label, but exposes no ports...
91- # 3. The role level simple-kafka-broker-default is labeled with app.kubernetes.io/name: listener???
92- # So we have a dedicated config for it
93- apiVersion : monitoring.coreos.com/v1
94- kind : ServiceMonitor
95- metadata :
96- name : stackable-kafka
97- labels :
98- stackable.tech/vendor : Stackable
99- release : prometheus
100- spec :
101- namespaceSelector :
102- any : true
103- selector :
104- matchLabels :
105- stackable.tech/vendor : Stackable
106- app.kubernetes.io/name : listener # Dafuq?
107- app.kubernetes.io/component : broker # We need to filter on brokers instead, as the app.kubernetes.io/name is messed up
108- endpoints :
109- - scheme : http
110- port : metrics
111- path : /metrics
112- podTargetLabels :
113- - app.kubernetes.io/name
114- - app.kubernetes.io/instance
115- - app.kubernetes.io/component
116- - app.kubernetes.io/role-group
117- - app.kubernetes.io/version
118- ---
119- # We prefer the native metrics over the statsd-exporter
120- apiVersion : monitoring.coreos.com/v1
121- kind : ServiceMonitor
122- metadata :
123- name : stackable-hdfs
124- labels :
125- stackable.tech/vendor : Stackable
126- release : prometheus
127- spec :
128- namespaceSelector :
129- any : true
130- selector :
131- matchLabels :
132- stackable.tech/vendor : Stackable
133- prometheus.io/scrape : " true"
134- app.kubernetes.io/name : hdfs
135- endpoints :
136- - scheme : http
137- port : http # Don't use the "metrics" exporter port, we want native metrics instead
138- path : /prom
139- podTargetLabels :
140- - app.kubernetes.io/name
141- - app.kubernetes.io/instance
142- - app.kubernetes.io/component
143- - app.kubernetes.io/role-group
144- - app.kubernetes.io/version
145- ---
146- apiVersion : monitoring.coreos.com/v1
147- kind : ServiceMonitor
148- metadata :
149- name : stackable-hbase
150- labels :
151- stackable.tech/vendor : Stackable
152- release : prometheus
153- spec :
154- namespaceSelector :
155- any : true
156- selector :
157- matchLabels :
158- stackable.tech/vendor : Stackable
159- prometheus.io/scrape : " true"
160- app.kubernetes.io/name : hbase
161- endpoints :
162- - scheme : http
163- port : metrics
164- path : /prometheus
165- podTargetLabels :
166- - app.kubernetes.io/name
167- - app.kubernetes.io/instance
168- - app.kubernetes.io/component
169- - app.kubernetes.io/role-group
170- - app.kubernetes.io/version
171- ---
172- apiVersion : monitoring.coreos.com/v1
173- kind : ServiceMonitor
174- metadata :
175- name : stackable-opensearch
176- labels :
177- stackable.tech/vendor : Stackable
178- release : prometheus
179- spec :
180- namespaceSelector :
181- any : true
182- selector :
183- matchLabels :
184- stackable.tech/vendor : Stackable
185- prometheus.io/scrape : " true"
186- app.kubernetes.io/name : opensearch
18739 endpoints :
18840 - relabelings :
41+ - sourceLabels :
42+ - __meta_kubernetes_pod_container_name
43+ # Pods show up twice due to multiple containers, we only keep the main / product container.
44+ # Except for Airflow and Superset, where we choose the metrics container.
45+ # - airflow: airflow
46+ # - superset: superset
47+ # - empty: filter when container label does not exist: hdfs
48+ regex : ^(airflow|superset|)$
49+ action : drop
50+ # Add empty label if not existing or pass-through existing value
51+ - sourceLabels :
52+ - __meta_kubernetes_service_annotation_prometheus_io_clusterdomain
53+ targetLabel : __tmp_clusterdomain__
54+ replacement : $1
55+ # Use default value if empty
56+ - sourceLabels :
57+ - __tmp_clusterdomain__
58+ targetLabel : __tmp_clusterdomain__
59+ regex : ^$
60+ replacement : " cluster.local"
61+ # Scheme and path extraction
18962 - sourceLabels :
19063 - __meta_kubernetes_service_annotation_prometheus_io_scheme
191- action : replace
19264 targetLabel : __scheme__
19365 regex : (https?)
19466 - sourceLabels :
19567 - __meta_kubernetes_service_annotation_prometheus_io_path
196- action : replace
19768 targetLabel : __metrics_path__
19869 regex : (.+)
199- # Use the FQDN instead of the IP address because the IP address
200- # is not contained in the certificate.
70+ # Build metrics service address
20171 - sourceLabels :
202- - __meta_kubernetes_pod_name
20372 - __meta_kubernetes_service_name
20473 - __meta_kubernetes_namespace
74+ - __tmp_clusterdomain__
20575 - __meta_kubernetes_service_annotation_prometheus_io_port
206- action : replace
20776 targetLabel : __address__
20877 regex : (.+);(.+);(.+);(\d+)
209- replacement : $1.$2.$3.svc.cluster.local:$4
210- tlsConfig :
211- ca :
212- secret :
213- name : prometheus-tls-certificate
214- key : ca.crt
215- podTargetLabels :
216- - app.kubernetes.io/name
217- - app.kubernetes.io/instance
218- - app.kubernetes.io/component
219- - app.kubernetes.io/role-group
220- - app.kubernetes.io/version
221- ---
222- # NiFI 2 is a beast on it's own...
223- # We need to use mTLS (otherwise we get a 401) and can not use the PodIP
224- apiVersion : monitoring.coreos.com/v1
225- kind : ServiceMonitor
226- metadata :
227- name : stackable-nifi-2
228- labels :
229- stackable.tech/vendor : Stackable
230- release : prometheus
231- spec :
232- namespaceSelector :
233- any : true
234- selector :
235- matchLabels :
236- stackable.tech/vendor : Stackable
237- prometheus.io/scrape : " true"
238- matchExpressions :
239- - key : app.kubernetes.io/name
240- operator : In
241- values :
242- - nifi
243- endpoints :
244- - scheme : https
245- port : https
246- path : /nifi-api/flow/metrics/prometheus
247- # See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.TLSConfig
78+ # <service-name>.<namespace>.svc.<cluster-domain>:<port>
79+ replacement : $1.$2.svc.$3:$4
24880 tlsConfig :
24981 ca :
25082 secret :
@@ -257,28 +89,15 @@ spec:
25789 keySecret :
25890 name : prometheus-tls-certificate
25991 key : tls.key
260- # We need to talk to the Pod via the FQDN of the Pod because of the stupid SNI check of NiFi.
261- # We can not use the typical PodIP, as it is not contained in the NiFi certificate,
262- # see https://github.com/stackabletech/secret-operator/issues/620
263- relabelings :
264- - sourceLabels :
265- - __meta_kubernetes_pod_name
266- - __meta_kubernetes_service_name
267- - __meta_kubernetes_namespace
268- - __meta_kubernetes_pod_container_port_number
269- targetLabel : __address__
270- replacement : ${1}.${2}-headless.${3}.svc.cluster.local:${4}
271- regex : (.+);(.+?)(?:-metrics)?;(.+);(.+)
27292 podTargetLabels :
27393 - app.kubernetes.io/name
27494 - app.kubernetes.io/instance
27595 - app.kubernetes.io/component
27696 - app.kubernetes.io/role-group
27797 - app.kubernetes.io/version
27898---
279- # spark-k8s-operator does not deploy any Services at all (at least for SparkApplications).
28099# We currently only scrape the driver, going forward we might want to scrape the executors as well.
281- # In the future we might also want to scrape SparkConnect and HistoryServers .
100+ # SparkConnect and HistoryServers are scraped via the `stackable` ServiceMonitor .
282101apiVersion : monitoring.coreos.com/v1
283102kind : PodMonitor
284103metadata :
@@ -308,26 +127,6 @@ spec:
308127---
309128apiVersion : monitoring.coreos.com/v1
310129kind : ServiceMonitor
311- metadata :
312- name : stackable-minio-http
313- labels :
314- stackable.tech/vendor : Stackable
315- release : prometheus
316- spec :
317- namespaceSelector :
318- any : true
319- selector :
320- matchLabels :
321- # stackable.tech/vendor: Stackable # This is not always set, e.g. missing in the nifi-kafka-druid-water-level-data demo
322- app : minio
323- monitoring : " true"
324- endpoints :
325- - scheme : http
326- port : http
327- path : /minio/v2/metrics/cluster
328- ---
329- apiVersion : monitoring.coreos.com/v1
330- kind : ServiceMonitor
331130metadata :
332131 name : stackable-minio-https
333132 labels :
0 commit comments