Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
497 changes: 489 additions & 8 deletions packaging/src/kubernetes/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,50 @@ spec:
hiveServer2:
description: HiveServer2 component configuration
properties:
autoscaling:
description: Autoscaling configuration (requires KEDA installed
in the cluster)
properties:
activationCpuValue:
description: CPU average value below which the trigger is
inactive. Required if targetCpuValue is set.
type: string
cooldownSeconds:
default: 600
description: Cooldown period in seconds after a scaling event
before another can occur
type: integer
enabled:
default: false
description: Whether autoscaling is enabled for this component
type: boolean
gracePeriodSeconds:
default: 300
description: Maximum time in seconds to wait for graceful
drain during scale-down before the pod is forcibly terminated
type: integer
minReplicas:
default: 0
description: Minimum number of replicas (floor for scale-down).
Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
for wake-from-zero)
type: integer
scaleDownThreshold:
default: 20
description: Threshold that triggers scale-down for Prometheus-based
metrics
type: integer
scaleUpThreshold:
default: 80
description: "Threshold that triggers scale-up (component-specific:\
\ sessions for HS2, connections for HMS, queue depth for\
\ LLAP, pending tasks for TezAM)"
type: integer
targetCpuValue:
description: "Target CPU average value for scaling (e.g.,\
\ '1500m' or '1'). If omitted, CPU scaling is disabled."
type: string
type: object
configOverrides:
additionalProperties:
type: string
Expand Down Expand Up @@ -152,6 +196,50 @@ spec:
llap:
description: LLAP daemon configuration. Enabled by default.
properties:
autoscaling:
description: Autoscaling configuration (requires KEDA installed
in the cluster)
properties:
activationCpuValue:
description: CPU average value below which the trigger is
inactive. Required if targetCpuValue is set.
type: string
cooldownSeconds:
default: 600
description: Cooldown period in seconds after a scaling event
before another can occur
type: integer
enabled:
default: false
description: Whether autoscaling is enabled for this component
type: boolean
gracePeriodSeconds:
default: 300
description: Maximum time in seconds to wait for graceful
drain during scale-down before the pod is forcibly terminated
type: integer
minReplicas:
default: 0
description: Minimum number of replicas (floor for scale-down).
Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
for wake-from-zero)
type: integer
scaleDownThreshold:
default: 20
description: Threshold that triggers scale-down for Prometheus-based
metrics
type: integer
scaleUpThreshold:
default: 80
description: "Threshold that triggers scale-up (component-specific:\
\ sessions for HS2, connections for HMS, queue depth for\
\ LLAP, pending tasks for TezAM)"
type: integer
targetCpuValue:
description: "Target CPU average value for scaling (e.g.,\
\ '1500m' or '1'). If omitted, CPU scaling is disabled."
type: string
type: object
configOverrides:
additionalProperties:
type: string
Expand Down Expand Up @@ -235,6 +323,50 @@ spec:
metastore:
description: Metastore component configuration
properties:
autoscaling:
description: Autoscaling configuration (requires KEDA installed
in the cluster)
properties:
activationCpuValue:
description: CPU average value below which the trigger is
inactive. Required if targetCpuValue is set.
type: string
cooldownSeconds:
default: 600
description: Cooldown period in seconds after a scaling event
before another can occur
type: integer
enabled:
default: false
description: Whether autoscaling is enabled for this component
type: boolean
gracePeriodSeconds:
default: 300
description: Maximum time in seconds to wait for graceful
drain during scale-down before the pod is forcibly terminated
type: integer
minReplicas:
default: 0
description: Minimum number of replicas (floor for scale-down).
Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
for wake-from-zero)
type: integer
scaleDownThreshold:
default: 20
description: Threshold that triggers scale-down for Prometheus-based
metrics
type: integer
scaleUpThreshold:
default: 80
description: "Threshold that triggers scale-up (component-specific:\
\ sessions for HS2, connections for HMS, queue depth for\
\ LLAP, pending tasks for TezAM)"
type: integer
targetCpuValue:
description: "Target CPU average value for scaling (e.g.,\
\ '1500m' or '1'). If omitted, CPU scaling is disabled."
type: string
type: object
configOverrides:
additionalProperties:
type: string
Expand Down Expand Up @@ -371,6 +503,50 @@ spec:
tezAm:
description: Tez Application Master configuration. Enabled by default.
properties:
autoscaling:
description: Autoscaling configuration (requires KEDA installed
in the cluster)
properties:
activationCpuValue:
description: CPU average value below which the trigger is
inactive. Required if targetCpuValue is set.
type: string
cooldownSeconds:
default: 600
description: Cooldown period in seconds after a scaling event
before another can occur
type: integer
enabled:
default: false
description: Whether autoscaling is enabled for this component
type: boolean
gracePeriodSeconds:
default: 300
description: Maximum time in seconds to wait for graceful
drain during scale-down before the pod is forcibly terminated
type: integer
minReplicas:
default: 0
description: Minimum number of replicas (floor for scale-down).
Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on
for wake-from-zero)
type: integer
scaleDownThreshold:
default: 20
description: Threshold that triggers scale-down for Prometheus-based
metrics
type: integer
scaleUpThreshold:
default: 80
description: "Threshold that triggers scale-up (component-specific:\
\ sessions for HS2, connections for HMS, queue depth for\
\ LLAP, pending tasks for TezAM)"
type: integer
targetCpuValue:
description: "Target CPU average value for scaling (e.g.,\
\ '1500m' or '1'). If omitted, CPU scaling is disabled."
type: string
type: object
configOverrides:
additionalProperties:
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,15 @@ rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
# PodDisruptionBudgets for graceful autoscaling
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
# KEDA ScaledObjects for autoscaling
- apiGroups: ["keda.sh"]
resources: ["scaledobjects", "triggerauthentications"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
# KEDA HTTP Add-on for scale-to-zero (wake-from-zero on HTTP request)
- apiGroups: ["http.keda.sh"]
resources: ["httpscaledobjects", "interceptorroutes"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@ spec:
extraVolumeMounts:
{{- toYaml .Values.cluster.metastore.extraVolumeMounts | nindent 6 }}
{{- end }}
{{- if and .Values.cluster.metastore.autoscaling .Values.cluster.metastore.autoscaling.enabled }}
autoscaling:
enabled: true
minReplicas: {{ .Values.cluster.metastore.autoscaling.minReplicas }}
scaleUpThreshold: {{ .Values.cluster.metastore.autoscaling.scaleUpThreshold }}
scaleDownThreshold: {{ .Values.cluster.metastore.autoscaling.scaleDownThreshold }}
{{- if .Values.cluster.metastore.autoscaling.targetCpuValue }}
targetCpuValue: {{ .Values.cluster.metastore.autoscaling.targetCpuValue | quote }}
{{- end }}
{{- if .Values.cluster.metastore.autoscaling.activationCpuValue }}
activationCpuValue: {{ .Values.cluster.metastore.autoscaling.activationCpuValue | quote }}
{{- end }}
cooldownSeconds: {{ .Values.cluster.metastore.autoscaling.cooldownSeconds }}
gracePeriodSeconds: {{ .Values.cluster.metastore.autoscaling.gracePeriodSeconds }}
{{- end }}
{{- else }}
{{- if .Values.cluster.metastore.externalUri }}
externalUri: {{ .Values.cluster.metastore.externalUri | quote }}
Expand Down Expand Up @@ -96,6 +111,21 @@ spec:
extraVolumeMounts:
{{- toYaml .Values.cluster.hiveServer2.extraVolumeMounts | nindent 6 }}
{{- end }}
{{- if and .Values.cluster.hiveServer2.autoscaling .Values.cluster.hiveServer2.autoscaling.enabled }}
autoscaling:
enabled: true
minReplicas: {{ .Values.cluster.hiveServer2.autoscaling.minReplicas }}
scaleUpThreshold: {{ .Values.cluster.hiveServer2.autoscaling.scaleUpThreshold }}
scaleDownThreshold: {{ .Values.cluster.hiveServer2.autoscaling.scaleDownThreshold }}
{{- if .Values.cluster.hiveServer2.autoscaling.targetCpuValue }}
targetCpuValue: {{ .Values.cluster.hiveServer2.autoscaling.targetCpuValue | quote }}
{{- end }}
{{- if .Values.cluster.hiveServer2.autoscaling.activationCpuValue }}
activationCpuValue: {{ .Values.cluster.hiveServer2.autoscaling.activationCpuValue | quote }}
{{- end }}
cooldownSeconds: {{ .Values.cluster.hiveServer2.autoscaling.cooldownSeconds }}
gracePeriodSeconds: {{ .Values.cluster.hiveServer2.autoscaling.gracePeriodSeconds }}
{{- end }}

llap:
enabled: {{ .Values.cluster.llap.enabled }}
Expand All @@ -120,6 +150,15 @@ spec:
extraVolumeMounts:
{{- toYaml .Values.cluster.llap.extraVolumeMounts | nindent 6 }}
{{- end }}
{{- if and .Values.cluster.llap.autoscaling .Values.cluster.llap.autoscaling.enabled }}
autoscaling:
enabled: true
minReplicas: {{ .Values.cluster.llap.autoscaling.minReplicas }}
scaleUpThreshold: {{ .Values.cluster.llap.autoscaling.scaleUpThreshold }}
scaleDownThreshold: {{ .Values.cluster.llap.autoscaling.scaleDownThreshold }}
cooldownSeconds: {{ .Values.cluster.llap.autoscaling.cooldownSeconds }}
gracePeriodSeconds: {{ .Values.cluster.llap.autoscaling.gracePeriodSeconds }}
{{- end }}
{{- end }}

tezAm:
Expand All @@ -146,6 +185,21 @@ spec:
extraVolumeMounts:
{{- toYaml .Values.cluster.tezAm.extraVolumeMounts | nindent 6 }}
{{- end }}
{{- if and .Values.cluster.tezAm.autoscaling .Values.cluster.tezAm.autoscaling.enabled }}
autoscaling:
enabled: true
minReplicas: {{ .Values.cluster.tezAm.autoscaling.minReplicas }}
scaleUpThreshold: {{ .Values.cluster.tezAm.autoscaling.scaleUpThreshold }}
scaleDownThreshold: {{ .Values.cluster.tezAm.autoscaling.scaleDownThreshold }}
{{- if .Values.cluster.tezAm.autoscaling.targetCpuValue }}
targetCpuValue: {{ .Values.cluster.tezAm.autoscaling.targetCpuValue | quote }}
{{- end }}
{{- if .Values.cluster.tezAm.autoscaling.activationCpuValue }}
activationCpuValue: {{ .Values.cluster.tezAm.autoscaling.activationCpuValue | quote }}
{{- end }}
cooldownSeconds: {{ .Values.cluster.tezAm.autoscaling.cooldownSeconds }}
gracePeriodSeconds: {{ .Values.cluster.tezAm.autoscaling.gracePeriodSeconds }}
{{- end }}
{{- end }}

zookeeper:
Expand Down
46 changes: 46 additions & 0 deletions packaging/src/kubernetes/helm/hive-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,17 @@ cluster:
configOverrides: {}
extraVolumes: []
extraVolumeMounts: []
# Autoscaling (requires KEDA + Prometheus in the cluster)
# When enabled, 'replicas' above acts as the max replica ceiling
autoscaling:
enabled: false
minReplicas: 1
scaleUpThreshold: 75
scaleDownThreshold: 30
# targetCpuValue: "750m" # Uncomment to enable CPU-based scaling (AverageValue)
# activationCpuValue: "200m" # CPU trigger inactive below this value
cooldownSeconds: 300
gracePeriodSeconds: 60
# Set to use an external Metastore instead of deploying one:
# enabled: false
# externalUri: "thrift://external-metastore:9083"
Expand All @@ -127,6 +138,18 @@ cluster:
externalJars: []
extraVolumes: []
extraVolumeMounts: []
# Autoscaling (requires KEDA + Prometheus + KEDA HTTP Add-on in the cluster)
# minReplicas: 0 enables scale-to-zero — beeline HTTP connects wake HS2 via KEDA HTTP interceptor
# When enabled, 'replicas' above acts as the max replica ceiling
autoscaling:
enabled: false
minReplicas: 0
scaleUpThreshold: 80
scaleDownThreshold: 20
# targetCpuValue: "1600m" # Uncomment to enable CPU-based scaling (AverageValue)
# activationCpuValue: "400m" # CPU trigger inactive below this value
cooldownSeconds: 600
gracePeriodSeconds: 300

# ---------------------------------------------------------------------------
# LLAP — enabled by default for full-HA
Expand All @@ -141,6 +164,16 @@ cluster:
configOverrides: {}
extraVolumes: []
extraVolumeMounts: []
# Autoscaling (requires KEDA + Prometheus in the cluster)
# minReplicas: 0 enables scale-to-zero — scales up immediately when queries need LLAP
# When enabled, 'replicas' above acts as the max replica ceiling
autoscaling:
enabled: false
minReplicas: 0
scaleUpThreshold: 1
scaleDownThreshold: 0
cooldownSeconds: 900
gracePeriodSeconds: 600

# ---------------------------------------------------------------------------
# TEZ AM — enabled by default for full-HA
Expand All @@ -154,3 +187,16 @@ cluster:
configOverrides: {}
extraVolumes: []
extraVolumeMounts: []
# Autoscaling (requires KEDA + Prometheus in the cluster)
# minReplicas: 0 enables scale-to-zero — wakes when HS2 receives queries
# When enabled, 'replicas' above acts as the max replica ceiling
# scaleUpThreshold: pending tasks per AM (e.g., 5 = scale when 5+ tasks waiting)
autoscaling:
enabled: false
minReplicas: 0
scaleUpThreshold: 5
scaleDownThreshold: 10
# targetCpuValue: "600m" # Uncomment to enable CPU-based scaling (AverageValue)
# activationCpuValue: "100m" # CPU trigger inactive below this value
cooldownSeconds: 600
gracePeriodSeconds: 120
Loading
Loading