From b7d87ba100c222b98e2ddffd0392d4b026bcc110 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Mon, 25 May 2026 15:58:29 +0530 Subject: [PATCH 1/4] K8s: Add AutoScaling --- packaging/src/kubernetes/README.md | 473 +++++++++++++++++- .../crds/hiveclusters.hive.apache.org-v1.yml | 152 ++++++ .../hive-operator/templates/clusterrole.yaml | 12 + .../hive-operator/templates/hivecluster.yaml | 36 ++ .../kubernetes/helm/hive-operator/values.yaml | 41 ++ .../dependent/HiveDependentResource.java | 139 +++++ .../HiveGenericDependentResource.java | 73 +++ .../HiveServer2DeploymentDependent.java | 104 +++- .../HiveServer2HttpScaledObjectDependent.java | 129 +++++ .../dependent/HiveServer2PdbDependent.java | 62 +++ .../HiveServer2ScaledObjectDependent.java | 149 ++++++ .../HiveServer2ServiceDependent.java | 8 + .../operator/dependent/LlapPdbDependent.java | 62 +++ .../dependent/LlapScaledObjectDependent.java | 158 ++++++ .../dependent/LlapStatefulSetDependent.java | 96 +++- .../MetastoreDeploymentDependent.java | 87 +++- .../dependent/MetastorePdbDependent.java | 62 +++ .../MetastoreScaledObjectDependent.java | 152 ++++++ .../operator/dependent/TezAmPdbDependent.java | 61 +++ .../dependent/TezAmScaledObjectDependent.java | 200 ++++++++ .../dependent/TezAmStatefulSetDependent.java | 73 ++- .../HiveServer2AutoscalingCondition.java | 41 ++ .../HiveServer2MetricScalingCondition.java | 44 ++ .../condition/HiveServer2Precondition.java | 9 +- .../HiveServer2ScaleToZeroCondition.java | 44 ++ .../condition/LlapAutoscalingCondition.java | 42 ++ .../MetastoreAutoscalingCondition.java | 42 ++ .../condition/MetastoreReadyCondition.java | 9 +- .../condition/TezAmAutoscalingCondition.java | 42 ++ .../operator/model/spec/AutoscalingSpec.java | 62 +++ .../operator/model/spec/HiveServer2Spec.java | 6 +- .../operator/model/spec/LlapSpec.java | 6 +- .../operator/model/spec/MetastoreSpec.java | 6 +- .../operator/model/spec/TezAmSpec.java | 6 +- .../reconciler/HiveClusterReconciler.java | 63 ++- .../kubernetes/operator/util/ConfigUtils.java | 20 + .../operator/util/HiveConfigBuilder.java | 22 + 37 files changed, 2745 insertions(+), 48 deletions(-) create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md index 1fc11623240c..e9ccc62d949a 100644 --- a/packaging/src/kubernetes/README.md +++ b/packaging/src/kubernetes/README.md @@ -505,19 +505,449 @@ kubectl get hiveclusters kubectl describe hivecluster hive ``` +--- + +## Autoscaling + +The operator supports metric-based autoscaling for all four Hive components using +[KEDA](https://keda.sh/) ScaledObjects and Kubernetes-native HPA. Autoscaling is +opt-in per component and designed for **zero query failures** during scale-down. + +### Prerequisites + +- [KEDA](https://keda.sh/) installed in the cluster +- [Prometheus](https://prometheus.io/) scraping Hive pod metrics (for HS2, HMS, LLAP custom metrics) +- Kubernetes metrics-server (for CPU-based triggers on Tez AM) +- [KEDA HTTP Add-on](https://github.com/kedacore/http-add-on) — **required for `minReplicas: 0`**, enables automatic wake-from-zero for HS2 + +### Installing KEDA + +KEDA must be installed **before** enabling autoscaling on any Hive component. +The operator creates KEDA `ScaledObject` custom resources which require the KEDA +CRDs to be present on the cluster. + +```bash +# Add the KEDA Helm repo +helm repo add kedacore https://kedacore.github.io/charts +helm install keda kedacore/keda --namespace keda --create-namespace --wait +``` + +Verify KEDA is running: + +```bash +kubectl get pods -n keda +# Expected: keda-operator, keda-metrics-apiserver, keda-admission-webhooks +kubectl get crd | grep keda +# Expected: scaledobjects.keda.sh, scaledjobs.keda.sh, triggerauthentications.keda.sh, etc. +``` + +**For HS2 scale-to-zero** (`minReplicas: 0`), install the KEDA HTTP Add-on: + +```bash +helm install http-add-on kedacore/keda-add-ons-http \ + --namespace keda --wait +``` + +Verify the interceptor is running: + +```bash +kubectl get pods -n keda -l app=keda-add-ons-http-interceptor-proxy +# Expected: keda-add-ons-http-interceptor-proxy-... Running +``` + +> **Note:** The HTTP Add-on is required when `minReplicas: 0`. It places an interceptor +> proxy in the traffic path that detects incoming requests when HS2 has zero pods, +> automatically scaling HS2 up and holding the request until a pod is ready. + +**For Prometheus-based triggers** (HS2, HMS, LLAP), install Prometheus: + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm install prometheus prometheus-community/prometheus \ + --namespace monitoring --create-namespace --wait +``` + +> **Note:** If autoscaling is enabled in the HiveCluster spec but KEDA is not +> installed, the operator will fail to reconcile with errors like +> `"Could not find the metadata for the given apiVersion and kind"`. +> Always install KEDA before setting `autoscaling.enabled: true`. + +### Graceful Scale-Down Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Scale Down Flow │ +├─────────────────────────────────────────────────────────────────────┤ +│ 1. KEDA reduces desired replicas (cooldown elapsed, metric below │ +│ threshold) │ +│ 2. PodDisruptionBudget ensures minAvailable=1 (at least one pod │ +│ always running) │ +│ 3. Kubernetes sends SIGTERM to selected pod │ +│ 4. preStop hook runs: │ +│ - HS2: deregisters from ZK, drains open sessions │ +│ - HMS: sleeps 30s for in-flight Thrift RPCs │ +│ - LLAP: waits until all executors become idle │ +│ - TezAM: waits for current DAG completion │ +│ 5. terminationGracePeriodSeconds = gracePeriodSeconds (safety net) │ +│ 6. Pod terminates only after drain completes │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### Per-Component Scaling Logic + +| Component | Scale-Up Trigger | Scale-Down Trigger | Cooldown | Native Metric | +|-----------|-----------------|-------------------|----------|---------------| +| **HiveServer2** | `hs2_active_sessions` > 80% of max **OR** CPU > 75% | `hs2_open_sessions` < 20% **AND** CPU < 30% | 10 min | `hs2_open_sessions`, `hs2_active_sessions` | +| **Metastore** | `api_get_partitions` rate spike **OR** CPU > 75% (2 min) | CPU < 30% **AND** API rate flat | 5 min | `api_get_partitions`, `open_connections` | +| **LLAP** | `NumQueuedRequests` > 0 for 1 min | `NumExecutorsAvailable == NumExecutors` (idle) | 15 min | `NumQueuedRequests`, `NumExecutorsAvailable` | +| **Tez AM** (with CPU resources) | Pod CPU > 60% (pool is busy) | Pod CPU < 10% (pool is idle) | 10 min | Standard K8s CPU | +| **Tez AM** (without CPU resources) | `tez_session_pending_tasks` > threshold | No pending tasks for cooldown | 10 min | `tez_session_pending_tasks` | + +### Scale-to-Zero Architecture + +When `minReplicas: 0` is configured (default for HS2, LLAP, TezAM), the cluster +scales down to zero pods when completely idle: + +``` + Scale-to-Zero (Idle Detection) + + 1. No active sessions/queries for cooldownPeriod seconds + → KEDA detects all triggers inactive + → scales HS2 to 0 (idleReplicaCount) + + 2. LLAP/TezAM ScaledObjects see hs2_open_sessions = 0 + → activation triggers inactive for cooldownPeriod + → scale LLAP and TezAM to 0 + + 3. HMS stays at minReplicas=1 (always available) + +``` + +``` + Wake-from-Zero (with KEDA HTTP Add-on) + + 1. Beeline connects → KEDA HTTP interceptor proxy queues the + request and triggers HS2 scale-up (0 → 1) + + 2. HS2 pod starts, reports hs2_open_sessions > 0 to Prometheus + + 3. KEDA detects cross-component activation trigger: + - LLAP ScaledObject sees hs2_open_sessions > 0 → scales up + - TezAM ScaledObject sees hs2_open_sessions > 0 → scales up + + 4. Query executes once LLAP/TezAM pods are ready + +``` + +> **Important:** Automatic wake-from-zero requires the KEDA HTTP Add-on. Traffic +> must flow through the interceptor proxy (via Ingress or port-forward). Without the +> HTTP Add-on, HS2 must be manually woken (`kubectl scale deployment/hive-hiveserver2 --replicas=1`). +> LLAP and TezAM wake automatically once HS2 reports active sessions. See +> [Connect to HiveServer2 > Connecting with Scale-to-Zero](#connecting-with-scale-to-zero-minreplicas--0) +> for setup instructions. + +**Component-specific behavior:** + +| Component | minReplicas | Scale-to-Zero Trigger | Wake Trigger | +|-----------|-------------|----------------------|--------------| +| **HS2** | 0 | `hs2_active_sessions = 0` for cooldown | HTTP request via KEDA interceptor (or manual) | +| **HMS** | 1 | Never (always running) | N/A | +| **LLAP** | 0 | `hs2_open_sessions = 0` for cooldown | `hs2_open_sessions > 0` (cross-component) | +| **TezAM** | 0 | `hs2_open_sessions = 0` + no pending tasks | `hs2_open_sessions > 0` (cross-component) | + +### Enabling Autoscaling + +**CLI (with Ozone storage backend):** + +```bash +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ + --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' \ + --set cluster.hiveServer2.autoscaling.enabled=true \ + --set cluster.hiveServer2.autoscaling.minReplicas=0 \ + --set cluster.hiveServer2.autoscaling.scaleUpThreshold=80 \ + --set cluster.hiveServer2.autoscaling.cooldownSeconds=600 \ + --set cluster.hiveServer2.autoscaling.gracePeriodSeconds=300 \ + --set cluster.metastore.autoscaling.enabled=true \ + --set cluster.metastore.autoscaling.minReplicas=1 \ + --set cluster.metastore.autoscaling.cooldownSeconds=300 \ + --set cluster.metastore.autoscaling.gracePeriodSeconds=60 \ + --set cluster.llap.autoscaling.enabled=true \ + --set cluster.llap.autoscaling.minReplicas=0 \ + --set cluster.llap.autoscaling.cooldownSeconds=900 \ + --set cluster.llap.autoscaling.gracePeriodSeconds=600 \ + --set cluster.tezAm.autoscaling.enabled=true \ + --set cluster.tezAm.autoscaling.minReplicas=0 \ + --set cluster.tezAm.autoscaling.scaleUpThreshold=5 \ + --set cluster.tezAm.autoscaling.cooldownSeconds=600 \ + --set cluster.tezAm.autoscaling.gracePeriodSeconds=120 +``` + +**Values file:** + +```yaml +# values-autoscaling.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" + + hiveServer2: + replicas: 10 # Acts as max replicas when autoscaling is enabled + resources: + requestsCpu: "1" # Required for CPU-based autoscaling trigger + requestsMemory: "2Gi" + autoscaling: + enabled: true + minReplicas: 0 # Scale to zero when idle + scaleUpThreshold: 80 # Requests/sec that triggers additional pods + cooldownSeconds: 600 # 10 min before scaling back to 0 + gracePeriodSeconds: 300 + + metastore: + replicas: 6 # Acts as max replicas when autoscaling is enabled + resources: + requestsCpu: "500m" # Required for CPU-based autoscaling trigger + requestsMemory: "1Gi" + autoscaling: + enabled: true + minReplicas: 1 # HMS must always be available + cooldownSeconds: 300 + gracePeriodSeconds: 60 + + llap: + replicas: 8 # Acts as max replicas when autoscaling is enabled + autoscaling: + enabled: true + minReplicas: 0 # Scale to zero when no queries need LLAP + cooldownSeconds: 900 # 15 min — scaling down destroys in-memory cache + gracePeriodSeconds: 600 + + tezAm: + replicas: 10 # Acts as max replicas when autoscaling is enabled + resources: + requestsCpu: "500m" # Required for CPU-based autoscaling trigger + requestsMemory: "1Gi" + autoscaling: + enabled: true + minReplicas: 0 # Scale to zero when no queries running + scaleUpThreshold: 60 # CPU% when resources set; pending tasks per AM otherwise + scaleDownThreshold: 10 + cooldownSeconds: 600 + gracePeriodSeconds: 120 +``` + +```bash +helm install hive ./helm/hive-operator -f values-autoscaling.yaml +``` + +When autoscaling is enabled, the operator automatically: +- Deploys the Prometheus JMX Exporter agent sidecar (port 9404, `/metrics`) +- Enables `hive.server2.metrics.enabled` / `metastore.metrics.enabled` (JMX reporter) +- Adds Prometheus scrape annotations to pods +- Creates KEDA ScaledObjects with the configured thresholds +- Creates PodDisruptionBudgets (minAvailable: 1) +- Configures preStop lifecycle hooks for graceful drain +- Sets `terminationGracePeriodSeconds` to the configured grace period +- Adds cross-component activation triggers for LLAP/TezAM (wake when HS2 has open sessions) + +**Exported Prometheus Metrics (per component):** + +| Component | Metrics | Purpose | +|-----------|---------|---------| +| **HiveServer2** | `hs2_open_sessions`, `hs2_active_sessions`, `hs2_active_calls_*`, `tez_session_pending_tasks`, `tez_session_running_tasks`, `tez_session_task_backlog_ratio` | Session/query load, Tez AM demand | +| **Metastore** | `api_*_total`, `hive_metastore_open_connections` | API call rates, connection count | +| **LLAP** | `hadoop_llapdaemon_executornumqueuedrequests`, `hadoop_llapdaemon_*` | Executor queue depth, daemon health | +| **Tez AM** | `tez_am_*` | DAG execution metrics | + +### CPU-Based Scaling and Resource Requests + +The operator includes a **CPU utilization trigger** in the ScaledObject for HS2, Metastore, +and Tez AM. KEDA's CPU trigger uses the `Utilization` metric type, which is defined as a +percentage of the container's CPU request. This means **the container must have a CPU request +defined** for the trigger to work. + +If you enable autoscaling without setting `resources` for that component, the operator +will omit the CPU trigger and rely solely on the Prometheus-based trigger. For Tez AM +specifically, without CPU resources the operator uses `tez_session_pending_tasks` (queued +tasks waiting for AM slots) as the proportional scaler — this reflects real query demand +rather than connection count, avoiding spurious scale-ups from idle or zombie sessions. + +To get both Prometheus and CPU-based scaling, set `resources` on the component: + +```yaml +cluster: + hiveServer2: + resources: + requestsCpu: "1" # Required for CPU-based autoscaling + requestsMemory: "2Gi" + autoscaling: + enabled: true + + metastore: + resources: + requestsCpu: "500m" # Required for CPU-based autoscaling + requestsMemory: "1Gi" + autoscaling: + enabled: true + + tezAm: + resources: + requestsCpu: "500m" # Required for CPU-based autoscaling + requestsMemory: "1Gi" + autoscaling: + enabled: true +``` + +> **Note:** LLAP scaling uses only Prometheus triggers (`NumQueuedRequests`) +> and does not include a CPU trigger, so LLAP does not require `resources` to +> be set for autoscaling to work. + +### Helm Values Reference (Autoscaling) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster..replicas` | `1-2` | Static replica count, or max replicas ceiling when autoscaling is enabled | +| `cluster..autoscaling.enabled` | `false` | Enable KEDA-based autoscaling | +| `cluster..autoscaling.minReplicas` | `0` (HS2/LLAP/TezAM), `1` (HMS) | Minimum replica count. Set to 0 for scale-to-zero | +| `cluster..autoscaling.scaleUpThreshold` | varies | Metric threshold triggering scale-up | +| `cluster..autoscaling.scaleDownThreshold` | varies | Metric threshold triggering scale-down | +| `cluster..autoscaling.cooldownSeconds` | varies | Cooldown after a scaling event | +| `cluster..autoscaling.gracePeriodSeconds` | varies | Max drain time before forced termination | + +--- + ## Connect to HiveServer2 +HiveServer2 runs in **HTTP transport mode** by default (recommended for Kubernetes +environments as it works well with load balancers, ingress controllers, and proxies). + +### Standard Connection (minReplicas >= 1) + +When HS2 always has at least one pod running, connect directly to the service: + ```bash -kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hiveserver2:10000/" +kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hiveserver2:10001/;transportMode=http;httpPath=cliservice" ``` Or via port-forward: ```bash -kubectl port-forward svc/hive-hiveserver2 10000:10000 -beeline -u "jdbc:hive2://localhost:10000/" +kubectl port-forward svc/hive-hiveserver2 10001:10001 +beeline -u "jdbc:hive2://localhost:10001/;transportMode=http;httpPath=cliservice" +``` + +### Connecting with Scale-to-Zero (minReplicas = 0) + +When HS2 is configured with `minReplicas: 0`, the deployment starts with zero pods. +Connections go through the **KEDA HTTP interceptor proxy** which automatically wakes +HS2 when a request arrives (first request takes ~30-60s while the pod starts). + +``` +Traffic flow: +Client → KEDA HTTP Interceptor → (if 0 pods: scale up, wait) → HS2 Service → HS2 Pod +``` + +**Via port-forward (local development):** + +```bash +# Port-forward the KEDA HTTP interceptor proxy +kubectl port-forward -n keda svc/keda-add-ons-http-interceptor-proxy 8080:8080 + +# Connect — interceptor auto-wakes HS2 (first request may take 30-60s) +beeline -u "jdbc:hive2://localhost:8080/;transportMode=http;httpPath=cliservice" +``` + +**Via Ingress (production):** + +Create an Ingress that routes your domain to the KEDA interceptor. The key is the +`upstream-vhost` annotation which rewrites the Host header to the internal service +name so the interceptor can match it — no extra operator configuration needed: + +```bash +cat <<'EOF' | kubectl apply -f - +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: hive-interceptor + namespace: keda + annotations: + # Rewrite Host header to internal service name so KEDA interceptor can route it + nginx.ingress.kubernetes.io/upstream-vhost: "hive-hiveserver2.default.svc.cluster.local" +spec: + ingressClassName: nginx + rules: + - host: hive.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: keda-add-ons-http-interceptor-proxy + port: + number: 8080 +EOF +``` + +Connect via beeline using the Ingress: + +```bash +beeline -u "jdbc:hive2://hive.example.com:80/;transportMode=http;httpPath=cliservice" +``` + +**Manual wake (fallback without HTTP Add-on):** + +```bash +kubectl scale deployment/hive-hiveserver2 --replicas=1 +kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=hiveserver2 --timeout=120s +kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hiveserver2:10001/;transportMode=http;httpPath=cliservice" ``` +> **Note:** The operator sets `hive.server2.transport.mode=http`, +> `hive.server2.thrift.http.port=10001`, and +> `hive.server2.thrift.http.path=cliservice` by default. The binary Thrift +> port (10000) is still exposed for backward compatibility but HTTP mode +> is the primary transport. To override, use `configOverrides` in the +> HiveServer2 spec. + --- ## Helm Values Reference @@ -620,6 +1050,18 @@ beeline -u "jdbc:hive2://localhost:10000/" | `cluster.tezAm.extraVolumes` | `[]` | Additional volumes for TezAM pods | | `cluster.tezAm.extraVolumeMounts` | `[]` | Additional volume mounts for TezAM containers | +### Autoscaling (per component) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster..autoscaling.enabled` | `false` | Enable KEDA-based autoscaling for this component | +| `cluster..autoscaling.minReplicas` | `2` | Floor replica count during scale-down | +| `cluster..autoscaling.scaleUpThreshold` | `60-80` | Metric threshold triggering scale-up (CPU% for HS2/HMS/TezAM with resources; pending tasks per AM for TezAM without resources; queue depth for LLAP) | +| `cluster..autoscaling.scaleDownThreshold` | `10-30` | Metric percentage threshold triggering scale-down | +| `cluster..autoscaling.cooldownSeconds` | `300-900` | Minimum seconds between scaling events | +| `cluster..autoscaling.gracePeriodSeconds` | `60-600` | Max time (seconds) to wait for graceful drain | +| `cluster.hiveServer2.autoscaling.scaleToZeroHosts` | `[]` | Hostnames for KEDA HTTP interceptor routing (Ingress domain) | + --- ## Upgrade and Uninstall @@ -659,12 +1101,35 @@ helm install hive ./helm/hive-operator -f my-values.yaml ### Remove Everything (including dependencies) ```bash +# 1. Uninstall Hive operator (removes ScaledObjects, pods, services via owner references) helm uninstall hive -kubectl delete crd hiveclusters.hive.apache.org +kubectl delete crd hiveclusters.hive.apache.org --ignore-not-found + +# 2. Remove HS2 Ingress (if configured for scale-to-zero wake) +kubectl delete ingress hive-hs2-ingress --ignore-not-found + +# 3. Uninstall autoscaling infrastructure (KEDA, HTTP Add-on, Prometheus) +helm uninstall http-add-on -n keda --ignore-not-found +helm uninstall keda -n keda --ignore-not-found +helm uninstall prometheus -n monitoring --ignore-not-found + +# 4. Remove KEDA CRDs (not removed by helm uninstall) +kubectl delete crd --ignore-not-found \ + scaledobjects.keda.sh \ + scaledjobs.keda.sh \ + triggerauthentications.keda.sh \ + clustertriggerauthentications.keda.sh \ + httpscaledobjects.http.keda.sh + +# 5. Uninstall storage and infrastructure dependencies helm uninstall ozone postgres zookeeper --ignore-not-found + +# 6. Clean up PVCs, secrets, and namespaces kubectl delete pvc data-zookeeper-0 --ignore-not-found kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found kubectl delete secret hive-db-secret --ignore-not-found +kubectl delete namespace keda --ignore-not-found +kubectl delete namespace monitoring --ignore-not-found ``` --- diff --git a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml index 99768633a128..6fc5916b84dd 100644 --- a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +++ b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml @@ -44,6 +44,44 @@ spec: hiveServer2: description: HiveServer2 component configuration properties: + autoscaling: + description: Autoscaling configuration (requires KEDA installed + in the cluster) + properties: + cooldownSeconds: + default: 600 + description: Cooldown period in seconds after a scaling event + before another can occur + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 300 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated + type: integer + minReplicas: + default: 0 + description: Minimum number of replicas (floor for scale-down). + Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on + for wake-from-zero) + type: integer + scaleDownThreshold: + default: 20 + description: "Percentage threshold that triggers scale-down\ + \ (all conditions must be met: metric below threshold AND\ + \ CPU below threshold)" + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ + \ with resources, pending tasks per AM for TezAM without\ + \ resources)" + type: integer + type: object configOverrides: additionalProperties: type: string @@ -152,6 +190,44 @@ spec: llap: description: LLAP daemon configuration. Enabled by default. properties: + autoscaling: + description: Autoscaling configuration (requires KEDA installed + in the cluster) + properties: + cooldownSeconds: + default: 600 + description: Cooldown period in seconds after a scaling event + before another can occur + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 300 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated + type: integer + minReplicas: + default: 0 + description: Minimum number of replicas (floor for scale-down). + Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on + for wake-from-zero) + type: integer + scaleDownThreshold: + default: 20 + description: "Percentage threshold that triggers scale-down\ + \ (all conditions must be met: metric below threshold AND\ + \ CPU below threshold)" + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ + \ with resources, pending tasks per AM for TezAM without\ + \ resources)" + type: integer + type: object configOverrides: additionalProperties: type: string @@ -235,6 +311,44 @@ spec: metastore: description: Metastore component configuration properties: + autoscaling: + description: Autoscaling configuration (requires KEDA installed + in the cluster) + properties: + cooldownSeconds: + default: 600 + description: Cooldown period in seconds after a scaling event + before another can occur + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 300 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated + type: integer + minReplicas: + default: 0 + description: Minimum number of replicas (floor for scale-down). + Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on + for wake-from-zero) + type: integer + scaleDownThreshold: + default: 20 + description: "Percentage threshold that triggers scale-down\ + \ (all conditions must be met: metric below threshold AND\ + \ CPU below threshold)" + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ + \ with resources, pending tasks per AM for TezAM without\ + \ resources)" + type: integer + type: object configOverrides: additionalProperties: type: string @@ -371,6 +485,44 @@ spec: tezAm: description: Tez Application Master configuration. Enabled by default. properties: + autoscaling: + description: Autoscaling configuration (requires KEDA installed + in the cluster) + properties: + cooldownSeconds: + default: 600 + description: Cooldown period in seconds after a scaling event + before another can occur + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 300 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated + type: integer + minReplicas: + default: 0 + description: Minimum number of replicas (floor for scale-down). + Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on + for wake-from-zero) + type: integer + scaleDownThreshold: + default: 20 + description: "Percentage threshold that triggers scale-down\ + \ (all conditions must be met: metric below threshold AND\ + \ CPU below threshold)" + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ + \ with resources, pending tasks per AM for TezAM without\ + \ resources)" + type: integer + type: object configOverrides: additionalProperties: type: string diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml index d27e1fea8c6f..791c60e0d813 100644 --- a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml @@ -50,3 +50,15 @@ rules: - apiGroups: [""] resources: ["pods"] verbs: ["get", "list", "watch"] + # PodDisruptionBudgets for graceful autoscaling + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # KEDA ScaledObjects for autoscaling + - apiGroups: ["keda.sh"] + resources: ["scaledobjects", "triggerauthentications"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # KEDA HTTP Add-on for scale-to-zero (wake-from-zero on HTTP request) + - apiGroups: ["http.keda.sh"] + resources: ["httpscaledobjects"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml index 091ecefb3cb0..c490b1f5d333 100644 --- a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml @@ -67,6 +67,15 @@ spec: extraVolumeMounts: {{- toYaml .Values.cluster.metastore.extraVolumeMounts | nindent 6 }} {{- end }} + {{- if and .Values.cluster.metastore.autoscaling .Values.cluster.metastore.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .Values.cluster.metastore.autoscaling.minReplicas }} + scaleUpThreshold: {{ .Values.cluster.metastore.autoscaling.scaleUpThreshold }} + scaleDownThreshold: {{ .Values.cluster.metastore.autoscaling.scaleDownThreshold }} + cooldownSeconds: {{ .Values.cluster.metastore.autoscaling.cooldownSeconds }} + gracePeriodSeconds: {{ .Values.cluster.metastore.autoscaling.gracePeriodSeconds }} + {{- end }} {{- else }} {{- if .Values.cluster.metastore.externalUri }} externalUri: {{ .Values.cluster.metastore.externalUri | quote }} @@ -96,6 +105,15 @@ spec: extraVolumeMounts: {{- toYaml .Values.cluster.hiveServer2.extraVolumeMounts | nindent 6 }} {{- end }} + {{- if and .Values.cluster.hiveServer2.autoscaling .Values.cluster.hiveServer2.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .Values.cluster.hiveServer2.autoscaling.minReplicas }} + scaleUpThreshold: {{ .Values.cluster.hiveServer2.autoscaling.scaleUpThreshold }} + scaleDownThreshold: {{ .Values.cluster.hiveServer2.autoscaling.scaleDownThreshold }} + cooldownSeconds: {{ .Values.cluster.hiveServer2.autoscaling.cooldownSeconds }} + gracePeriodSeconds: {{ .Values.cluster.hiveServer2.autoscaling.gracePeriodSeconds }} + {{- end }} llap: enabled: {{ .Values.cluster.llap.enabled }} @@ -120,6 +138,15 @@ spec: extraVolumeMounts: {{- toYaml .Values.cluster.llap.extraVolumeMounts | nindent 6 }} {{- end }} + {{- if and .Values.cluster.llap.autoscaling .Values.cluster.llap.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .Values.cluster.llap.autoscaling.minReplicas }} + scaleUpThreshold: {{ .Values.cluster.llap.autoscaling.scaleUpThreshold }} + scaleDownThreshold: {{ .Values.cluster.llap.autoscaling.scaleDownThreshold }} + cooldownSeconds: {{ .Values.cluster.llap.autoscaling.cooldownSeconds }} + gracePeriodSeconds: {{ .Values.cluster.llap.autoscaling.gracePeriodSeconds }} + {{- end }} {{- end }} tezAm: @@ -146,6 +173,15 @@ spec: extraVolumeMounts: {{- toYaml .Values.cluster.tezAm.extraVolumeMounts | nindent 6 }} {{- end }} + {{- if and .Values.cluster.tezAm.autoscaling .Values.cluster.tezAm.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .Values.cluster.tezAm.autoscaling.minReplicas }} + scaleUpThreshold: {{ .Values.cluster.tezAm.autoscaling.scaleUpThreshold }} + scaleDownThreshold: {{ .Values.cluster.tezAm.autoscaling.scaleDownThreshold }} + cooldownSeconds: {{ .Values.cluster.tezAm.autoscaling.cooldownSeconds }} + gracePeriodSeconds: {{ .Values.cluster.tezAm.autoscaling.gracePeriodSeconds }} + {{- end }} {{- end }} zookeeper: diff --git a/packaging/src/kubernetes/helm/hive-operator/values.yaml b/packaging/src/kubernetes/helm/hive-operator/values.yaml index b7d75930c5b2..a0823f90de0d 100644 --- a/packaging/src/kubernetes/helm/hive-operator/values.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/values.yaml @@ -112,6 +112,15 @@ cluster: configOverrides: {} extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (requires KEDA + Prometheus in the cluster) + # When enabled, 'replicas' above acts as the max replica ceiling + autoscaling: + enabled: false + minReplicas: 1 + scaleUpThreshold: 75 + scaleDownThreshold: 30 + cooldownSeconds: 300 + gracePeriodSeconds: 60 # Set to use an external Metastore instead of deploying one: # enabled: false # externalUri: "thrift://external-metastore:9083" @@ -127,6 +136,16 @@ cluster: externalJars: [] extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (requires KEDA + Prometheus + KEDA HTTP Add-on in the cluster) + # minReplicas: 0 enables scale-to-zero — beeline HTTP connects wake HS2 via KEDA HTTP interceptor + # When enabled, 'replicas' above acts as the max replica ceiling + autoscaling: + enabled: false + minReplicas: 0 + scaleUpThreshold: 80 + scaleDownThreshold: 20 + cooldownSeconds: 600 + gracePeriodSeconds: 300 # --------------------------------------------------------------------------- # LLAP — enabled by default for full-HA @@ -141,6 +160,16 @@ cluster: configOverrides: {} extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (requires KEDA + Prometheus in the cluster) + # minReplicas: 0 enables scale-to-zero — scales up immediately when queries need LLAP + # When enabled, 'replicas' above acts as the max replica ceiling + autoscaling: + enabled: false + minReplicas: 0 + scaleUpThreshold: 1 + scaleDownThreshold: 0 + cooldownSeconds: 900 + gracePeriodSeconds: 600 # --------------------------------------------------------------------------- # TEZ AM — enabled by default for full-HA @@ -154,3 +183,15 @@ cluster: configOverrides: {} extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (requires KEDA + Prometheus in the cluster) + # minReplicas: 0 enables scale-to-zero — wakes when HS2 receives queries + # When enabled, 'replicas' above acts as the max replica ceiling + # scaleUpThreshold: with CPU resources set → CPU% (e.g., 60 = 60% utilization); + # without CPU resources → pending tasks per AM (e.g., 5 = scale when 5+ tasks waiting) + autoscaling: + enabled: false + minReplicas: 0 + scaleUpThreshold: 5 + scaleDownThreshold: 10 + cooldownSeconds: 600 + gracePeriodSeconds: 120 diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java index cc2eb0de6de0..9b1cb75d6553 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java @@ -45,11 +45,13 @@ import io.javaoperatorsdk.operator.processing.dependent.Matcher; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; import org.apache.hive.kubernetes.operator.model.spec.ResourceRequirementsSpec; import org.apache.hive.kubernetes.operator.model.spec.SecretKeyRef; import org.apache.hive.kubernetes.operator.model.spec.ProbeSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -422,4 +424,141 @@ protected static Probe buildTcpProbe(int port, ProbeSpec spec, int defaultInitia return builder.build(); } + /** Path where the JMX Exporter agent JAR is stored inside the pod. */ + protected static final String JMX_EXPORTER_DIR = "/opt/jmx-exporter"; + protected static final String JMX_EXPORTER_JAR = JMX_EXPORTER_DIR + "/jmx_prometheus_javaagent.jar"; + protected static final String JMX_EXPORTER_CONFIG = JMX_EXPORTER_DIR + "/config.yaml"; + + /** + * Adds the Prometheus JMX Exporter agent infrastructure to a pod spec when + * autoscaling is enabled. This includes: + *
    + *
  • An emptyDir volume for the JMX exporter JAR and config
  • + *
  • An init container that downloads the agent JAR and writes a config file
  • + *
  • A volume mount on the main container
  • + *
  • A container port for the metrics endpoint (9404)
  • + *
  • The javaagent JVM argument appended to SERVICE_OPTS
  • + *
+ * + * @param image the container image (used for the init container) + * @param component the Hive component name (for JMX bean pattern matching) + * @param initContainers list to add the download init container to + * @param volumeMounts list to add the jmx-exporter mount to (main container) + * @param volumes list to add the emptyDir volume to + * @param envVars list of env vars — SERVICE_OPTS will be updated with the javaagent flag + * @param ports list to add the metrics port to + */ + protected static void addJmxExporter( + String image, String component, + List initContainers, + List volumeMounts, + List volumes, + List envVars, + List ports) { + + // Volume for the JMX exporter JAR + config + volumes.add(new VolumeBuilder() + .withName("jmx-exporter") + .withNewEmptyDir().endEmptyDir().build()); + VolumeMount exporterMount = new VolumeMountBuilder() + .withName("jmx-exporter") + .withMountPath(JMX_EXPORTER_DIR).build(); + volumeMounts.add(exporterMount); + + // JMX exporter config: export all beans in a catch-all pattern + // The agent exposes metrics in Prometheus text format at /metrics + String jmxConfig = buildJmxExporterConfig(component); + + // Init container: download JAR + write config + String downloadCmd = String.format( + "wget -q --tries=3 --waitretry=5 -O %s '%s' && " + + "cat > %s << 'JMXEOF'\n%s\nJMXEOF", + JMX_EXPORTER_JAR, ConfigUtils.JMX_EXPORTER_JAR_URL, + JMX_EXPORTER_CONFIG, jmxConfig); + initContainers.add(new ContainerBuilder() + .withName("jmx-exporter-init") + .withImage(image) + .withCommand("/bin/bash", "-c", downloadCmd) + .withVolumeMounts(exporterMount) + .build()); + + // Expose the metrics port + ports.add(new io.fabric8.kubernetes.api.model.ContainerPortBuilder() + .withName("metrics") + .withContainerPort(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT).build()); + + // Add javaagent flag to the appropriate JVM opts env var. + // LLAP uses LLAP_DAEMON_OPTS (its startup script ignores SERVICE_OPTS). + String agentArg = String.format("-javaagent:%s=%d:%s", + JMX_EXPORTER_JAR, ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT, JMX_EXPORTER_CONFIG); + String optsEnvVar = "llap".equals(component) ? "LLAP_DAEMON_OPTS" : "SERVICE_OPTS"; + boolean found = false; + for (int i = 0; i < envVars.size(); i++) { + if (optsEnvVar.equals(envVars.get(i).getName())) { + String existing = envVars.get(i).getValue(); + envVars.set(i, new EnvVar(optsEnvVar, + existing + " " + agentArg, null)); + found = true; + break; + } + } + if (!found) { + envVars.add(new EnvVar(optsEnvVar, agentArg, null)); + } + } + + /** + * Builds the JMX Exporter YAML config for a Hive component. + * Uses broad patterns to export all Hive/Hadoop metrics relevant to autoscaling. + */ + private static String buildJmxExporterConfig(String component) { + StringBuilder sb = new StringBuilder(); + sb.append("lowercaseOutputName: true\n"); + sb.append("lowercaseOutputLabelNames: true\n"); + sb.append("rules:\n"); + + switch (component) { + case "hiveserver2": + // HS2 session and operation metrics + sb.append("- pattern: 'metrics<>Value'\n"); + sb.append(" name: hs2_$1\n"); + sb.append(" type: GAUGE\n"); + sb.append("- pattern: 'metrics<>Count'\n"); + sb.append(" name: hs2_active_calls_$1\n"); + sb.append(" type: GAUGE\n"); + // Tez session pool metrics (pending tasks, backlog ratio, running tasks) + sb.append("- pattern: 'metrics<>Value'\n"); + sb.append(" name: tez_session_$1\n"); + sb.append(" type: GAUGE\n"); + break; + case "metastore": + // HMS API call metrics + sb.append("- pattern: 'metrics<>Count'\n"); + sb.append(" name: api_$1_total\n"); + sb.append(" type: COUNTER\n"); + sb.append("- pattern: 'metrics<>Value'\n"); + sb.append(" name: hive_metastore_open_connections\n"); + sb.append(" type: GAUGE\n"); + break; + case "llap": + // LLAP uses its own MetricsSystem (not DefaultMetricsSystem). + // Default JMX exporter pattern (.*) exports Hadoop Metrics2 MBeans as: + // hadoop_llapdaemon_{name=""} + // e.g., hadoop_llapdaemon_executornumqueuedrequests{name="LlapDaemonExecutorMetrics-..."} + // No custom rules needed — the default naming is usable directly. + sb.append("- pattern: '.*'\n"); + break; + case "tezam": + // TezAM DAG execution metrics + sb.append("- pattern: 'Hadoop<>(.+)'\n"); + sb.append(" name: tez_am_$1\n"); + sb.append(" type: GAUGE\n"); + break; + default: + sb.append("- pattern: '.*'\n"); + break; + } + return sb.toString(); + } + } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java new file mode 100644 index 000000000000..feff8775a6f4 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.Optional; +import java.util.Set; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.GarbageCollected; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import io.javaoperatorsdk.operator.processing.dependent.Creator; +import io.javaoperatorsdk.operator.processing.dependent.Updater; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.GenericKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Base class for dependent resources that manage custom resources via + * {@link GenericKubernetesResource} (e.g. KEDA ScaledObject, HTTPScaledObject). + *

+ * Extends {@link GenericKubernetesDependentResource} which properly configures + * the informer with the specified GroupVersionKind, avoiding the fabric8 + * "resources cannot be called with a generic type" error. + *

+ * Also overrides {@link #getSecondaryResource} to use the dependent's own + * event source (same pattern as {@link HiveDependentResource}) so multiple + * GenericKubernetesResource dependents don't collide in the type-based lookup. + */ +public abstract class HiveGenericDependentResource + extends GenericKubernetesDependentResource + implements Creator, + Updater, + GarbageCollected { + + protected HiveGenericDependentResource(GroupVersionKind gvk) { + super(gvk); + } + + /** + * Returns the expected Kubernetes resource name for this dependent given the primary. + * Used to discriminate between multiple secondary resources of the same GVK + * (e.g. multiple ScaledObjects owned by the same HiveCluster). + */ + protected abstract String getResourceName(HiveCluster hiveCluster); + + @Override + public Optional getSecondaryResource( + HiveCluster primary, Context context) { + String expectedName = getResourceName(primary); + Set secondaries = eventSource() + .map(es -> es.getSecondaryResources(primary)) + .orElse(Set.of()); + return secondaries.stream() + .filter(r -> expectedName.equals(r.getMetadata().getName())) + .findFirst(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java index ccb3048dea98..c61383ac0f5a 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java @@ -26,6 +26,8 @@ import io.fabric8.kubernetes.api.model.ContainerPort; import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Lifecycle; +import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.Probe; import io.fabric8.kubernetes.api.model.apps.Deployment; import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; @@ -34,6 +36,7 @@ import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; @@ -125,21 +128,28 @@ protected Deployment desired(HiveCluster hiveCluster, hs2.configOverrides(), ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); + int hs2HttpPort = ConfigUtils.getInt( + hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); int hs2WebUiPort = ConfigUtils.getInt( hs2.configOverrides(), ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); - List ports = List.of( - new ContainerPortBuilder() - .withName("thrift") - .withContainerPort(hs2ThriftPort).build(), - new ContainerPortBuilder() - .withName("webui") - .withContainerPort(hs2WebUiPort).build() - ); + List ports = new ArrayList<>(); + ports.add(new ContainerPortBuilder() + .withName("thrift") + .withContainerPort(hs2ThriftPort).build()); + ports.add(new ContainerPortBuilder() + .withName("http") + .withContainerPort(hs2HttpPort).build()); + ports.add(new ContainerPortBuilder() + .withName("webui") + .withContainerPort(hs2WebUiPort).build()); - Probe readinessProbe = buildTcpProbe(hs2ThriftPort, hs2.readinessProbe(), 15, 10, 3); - Probe livenessProbe = buildTcpProbe(hs2ThriftPort, hs2.livenessProbe(), 120, 30, 10); + // Probes target the HTTP transport port (default mode) + Probe readinessProbe = buildTcpProbe(hs2HttpPort, hs2.readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(hs2HttpPort, hs2.livenessProbe(), 120, 30, 10); boolean tezAmEnabled = spec.tezAm().isEnabled(); @@ -185,6 +195,13 @@ protected Deployment desired(HiveCluster hiveCluster, replaceConfMountWithSubPaths(volumeMounts, "hive-config", "hive-site.xml", "tez-site.xml", "core-site.xml"); + // Add Prometheus JMX Exporter when autoscaling is enabled + AutoscalingSpec autoscaling = hs2.autoscaling(); + if (autoscaling.isEnabled()) { + addJmxExporter(spec.image(), COMPONENT, + initContainers, volumeMounts, volumes, envVars, ports); + } + // Pre-compute config hash for the pod template annotation. // This ensures the Deployment is created with the correct hash // from the start (single ReplicaSet) and triggers rolling @@ -194,6 +211,19 @@ protected Deployment desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + // When autoscaling is enabled and the Deployment already exists, preserve the current + // replica count (managed by KEDA/HPA). On initial creation: + // - minReplicas == 0 (scale-to-zero): start at 0, KEDA HTTPScaledObject handles wake-up + // - minReplicas > 0: start at configured replicas + boolean autoscalingEnabled = hs2.autoscaling() != null && hs2.autoscaling().isEnabled(); + Integer replicas = hs2.replicas(); + if (autoscalingEnabled) { + int initialReplicas = hs2.autoscaling().minReplicas() == 0 ? 0 : hs2.replicas(); + replicas = getSecondaryResource(hiveCluster, context) + .map(d -> d.getSpec().getReplicas()) + .orElse(initialReplicas); + } + Deployment deployment = new DeploymentBuilder() .withNewMetadata() .withName(resourceName(hiveCluster)) @@ -201,7 +231,7 @@ protected Deployment desired(HiveCluster hiveCluster, .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) .endMetadata() .withNewSpec() - .withReplicas(hs2.replicas()) + .withReplicas(replicas) .withNewSelector() .withMatchLabels(selectorLabels) .endSelector() @@ -233,6 +263,58 @@ protected Deployment desired(HiveCluster hiveCluster, applySpreadAffinityIfAbsent( deployment.getSpec().getTemplate().getSpec(), selectorLabels); + // Graceful scale-down: deregister from ZK, then poll JMX Exporter (port 9404) for sessions. + // Uses flat Prometheus text format — same metric KEDA reads — not brittle JSON parsing. + if (autoscaling.isEnabled()) { + String preStopScript = String.join("\n", + "#!/bin/bash", + "echo '[preStop] Deregistering HiveServer2 from ZooKeeper...'", + "hive --service hiveserver2 --deregister || echo '[preStop] WARNING: ZK deregister failed'", + "echo '[preStop] Waiting for open sessions to drain (polling localhost:9404/metrics)...'", + "RETRIES=0", + "while true; do", + " RESPONSE=$(curl -sf http://localhost:9404/metrics)", + " if [ $? -ne 0 ]; then", + " RETRIES=$((RETRIES+1))", + " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", + " if [ $RETRIES -ge 6 ]; then", + " echo '[preStop] JMX Exporter not responding after 30s. Proceeding with shutdown.'", + " break", + " fi", + " sleep 5; continue", + " fi", + " SESSIONS=$(echo \"$RESPONSE\" | grep '^hs2_open_sessions ' | awk '{print $2}')", + " if [ -z \"$SESSIONS\" ]; then", + " echo '[preStop] WARNING: hs2_open_sessions metric not found. JMX Exporter may not be configured.'", + " break", + " fi", + " if [ \"${SESSIONS%.*}\" -le 0 ] 2>/dev/null; then", + " echo '[preStop] All sessions drained. Shutting down.'", + " break", + " fi", + " echo \"[preStop] hs2_open_sessions=$SESSIONS — waiting...\"", + " RETRIES=0", + " sleep 5", + "done"); + Lifecycle lifecycle = new LifecycleBuilder() + .withNewPreStop() + .withNewExec() + .withCommand("/bin/bash", "-c", preStopScript) + .endExec() + .endPreStop() + .build(); + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); + deployment.getSpec().getTemplate().getSpec() + .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); + // Prometheus scrape annotations for JMX Exporter metrics endpoint + deployment.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/scrape", "true"); + deployment.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/port", String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); + deployment.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/path", "/metrics"); + } + if (spec.volumes() != null) { deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java new file mode 100644 index 000000000000..74794ad8e16a --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages a KEDA HTTPScaledObject for HiveServer2 scale-to-zero. + *

+ * Requires the KEDA HTTP Add-on to be installed in the cluster. + * The HTTP Add-on creates an interceptor proxy that: + *

    + *
  • Sits in front of the HS2 Service
  • + *
  • Queues incoming beeline/HTTP requests when HS2 has 0 pods
  • + *
  • Triggers KEDA to scale HS2 from 0 to 1
  • + *
  • Forwards the queued request once a pod is ready
  • + *
+ *

+ * This dependent is activated ONLY when minReplicas == 0 (scale-to-zero mode). + * When minReplicas > 0, the regular ScaledObject (Prometheus-based) is used instead. + */ +public class HiveServer2HttpScaledObjectDependent extends HiveGenericDependentResource { + + public HiveServer2HttpScaledObjectDependent() { + super(new GroupVersionKind("http.keda.sh", "v1alpha1", "HTTPScaledObject")); + } + + @Override + protected GenericKubernetesResource desired(HiveCluster hiveCluster, + Context context) { + AutoscalingSpec autoscaling = hiveCluster.getSpec().hiveServer2().autoscaling(); + int maxReplicas = hiveCluster.getSpec().hiveServer2().replicas(); + String clusterName = hiveCluster.getMetadata().getName(); + String namespace = hiveCluster.getMetadata().getNamespace(); + String deploymentName = clusterName + "-hiveserver2"; + String serviceName = clusterName + "-hiveserver2"; + + int httpPort = ConfigUtils.getInt( + hiveCluster.getSpec().hiveServer2().configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); + + Map spec = new HashMap<>(); + + // Hosts the interceptor matches for routing. + // Uses internal service DNS names (Ingress rewrites Host header to match these) + // plus localhost for kubectl port-forward scenarios. + spec.put("hosts", List.of( + serviceName + "." + namespace + ".svc.cluster.local", + serviceName, + "localhost" + )); + spec.put("pathPrefixes", List.of("/")); + + // Target deployment and service + spec.put("scaleTargetRef", Map.of( + "name", deploymentName, + "kind", "Deployment", + "apiVersion", "apps/v1", + "service", serviceName, + "port", httpPort + )); + + // Replica bounds + spec.put("replicas", Map.of( + "min", 0, + "max", maxReplicas + )); + + // Scaling metric: scale up when there are pending requests + spec.put("scalingMetric", Map.of( + "requestRate", Map.of( + "granularity", "1s", + "targetValue", autoscaling.scaleUpThreshold(), + "window", "1m" + ) + )); + + // Cooldown before scaling back to 0 + spec.put("scaledownPeriod", autoscaling.cooldownSeconds()); + + return new GenericKubernetesResourceBuilder() + .withApiVersion("http.keda.sh/v1alpha1") + .withKind("HTTPScaledObject") + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(namespace) + .withLabels(Labels.forComponent(hiveCluster, "hiveserver2")) + .endMetadata() + .withAdditionalProperties(Map.of("spec", spec)) + .build(); + } + + @Override + protected String getResourceName(HiveCluster hiveCluster) { + return resourceName(hiveCluster); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-httpso"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java new file mode 100644 index 000000000000..054881f9644d --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * PodDisruptionBudget for HiveServer2. + * Ensures at least one HS2 pod remains available during voluntary disruptions + * (scale-down, node drain, rolling updates) to prevent query failures. + */ +public class HiveServer2PdbDependent + extends CRUDKubernetesDependentResource { + + public HiveServer2PdbDependent() { + super(PodDisruptionBudget.class); + } + + @Override + protected PodDisruptionBudget desired(HiveCluster hiveCluster, + Context context) { + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "hiveserver2")) + .endMetadata() + .withNewSpec() + .withMinAvailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(Labels.selectorForComponent(hiveCluster, "hiveserver2")) + .endSelector() + .endSpec() + .build(); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-pdb"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java new file mode 100644 index 000000000000..196d97ce8b8f --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages a KEDA ScaledObject for HiveServer2 autoscaling. + *

+ * Scale-up triggers (OR): + * - hs2_active_sessions > scaleUpThreshold% of hive.server2.session.max (1 min) + * - Pod CPU > 75% + *

+ * Scale-down triggers (AND): + * - hs2_open_sessions < scaleDownThreshold% of max + * - CPU < 30% + *

+ * Cooldown: configurable (default 600s / 10 minutes) + */ +public class HiveServer2ScaledObjectDependent extends HiveGenericDependentResource { + + public HiveServer2ScaledObjectDependent() { + super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); + } + + @Override + protected GenericKubernetesResource desired(HiveCluster hiveCluster, + Context context) { + AutoscalingSpec autoscaling = hiveCluster.getSpec().hiveServer2().autoscaling(); + int maxReplicas = hiveCluster.getSpec().hiveServer2().replicas(); + String targetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; + + Map spec = new HashMap<>(); + spec.put("scaleTargetRef", Map.of( + "apiVersion", "apps/v1", + "kind", "Deployment", + "name", targetName + )); + // KEDA requires idleReplicaCount < minReplicaCount. + // For scale-to-zero: min=1 (minimum when active), idle=0 (scale to zero when idle). + int minReplicaCount = Math.max(1, autoscaling.minReplicas()); + spec.put("minReplicaCount", minReplicaCount); + spec.put("maxReplicaCount", maxReplicas); + if (autoscaling.minReplicas() == 0) { + spec.put("idleReplicaCount", 0); + } + spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); + spec.put("pollingInterval", 30); + + // Advanced scaling policy: scale down one pod at a time for graceful drain + spec.put("advanced", Map.of( + "horizontalPodAutoscalerConfig", Map.of( + "behavior", Map.of( + "scaleDown", Map.of( + "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), + "policies", List.of(Map.of( + "type", "Pods", + "value", 1, + "periodSeconds", 60 + )) + ), + "scaleUp", Map.of( + "stabilizationWindowSeconds", 60, + "policies", List.of(Map.of( + "type", "Percent", + "value", 100, + "periodSeconds", 60 + )) + ) + ) + ) + )); + + // Triggers: Prometheus for hs2_active_sessions + CPU fallback (only when CPU requests defined) + // "or vector(0)" ensures the query returns 0 (not empty) when HS2 has no pods. + List> triggers = new ArrayList<>(); + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hs2_active_sessions", + "query", String.format( + "avg(hs2_active_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", + hiveCluster.getMetadata().getNamespace(), targetName), + "threshold", String.valueOf(autoscaling.scaleUpThreshold()), + "activationThreshold", "0" + ) + )); + if (hiveCluster.getSpec().hiveServer2().resources() != null) { + // activationValue prevents idle JVM CPU from keeping the ScaledObject active. + triggers.add(Map.of( + "type", "cpu", + "metricType", "Utilization", + "metadata", Map.of( + "value", "75", + "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) + ) + )); + } + spec.put("triggers", triggers); + + return new GenericKubernetesResourceBuilder() + .withApiVersion("keda.sh/v1alpha1") + .withKind("ScaledObject") + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "hiveserver2")) + .endMetadata() + .withAdditionalProperties(Map.of("spec", spec)) + .build(); + } + + @Override + protected String getResourceName(HiveCluster hiveCluster) { + return resourceName(hiveCluster); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-scaledobject"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java index a9707ac0dfa6..13b218986e67 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java @@ -48,6 +48,9 @@ protected Service desired(HiveCluster hiveCluster, int thriftPort = ConfigUtils.getInt(hs2.configOverrides(), ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); + int httpPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); int webUiPort = ConfigUtils.getInt(hs2.configOverrides(), ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); @@ -68,6 +71,11 @@ protected Service desired(HiveCluster hiveCluster, .withPort(thriftPort) .withTargetPort(new IntOrString(thriftPort)) .endPort() + .addNewPort() + .withName("http") + .withPort(httpPort) + .withTargetPort(new IntOrString(httpPort)) + .endPort() .addNewPort() .withName("webui") .withPort(webUiPort) diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java new file mode 100644 index 000000000000..1f077751aa61 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * PodDisruptionBudget for LLAP daemons. + * Ensures at least one LLAP daemon remains available during voluntary disruptions + * to prevent query failures and cache loss. + */ +public class LlapPdbDependent + extends CRUDKubernetesDependentResource { + + public LlapPdbDependent() { + super(PodDisruptionBudget.class); + } + + @Override + protected PodDisruptionBudget desired(HiveCluster hiveCluster, + Context context) { + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "llap")) + .endMetadata() + .withNewSpec() + .withMinAvailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(Labels.selectorForComponent(hiveCluster, "llap")) + .endSelector() + .endSpec() + .build(); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-llap-pdb"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java new file mode 100644 index 000000000000..7f6886a594df --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages a KEDA ScaledObject for LLAP daemon autoscaling. + *

+ * Scale-up trigger: + * - NumQueuedRequests > 0 for 1 minute (queue non-empty means all executors are busy) + *

+ * Scale-down trigger: + * - NumExecutorsAvailable == NumExecutors (daemon completely idle) + *

+ * Cooldown: configurable (default 900s / 15 minutes — scaling down destroys in-memory cache) + */ +public class LlapScaledObjectDependent extends HiveGenericDependentResource { + + public LlapScaledObjectDependent() { + super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); + } + + @Override + protected GenericKubernetesResource desired(HiveCluster hiveCluster, + Context context) { + AutoscalingSpec autoscaling = hiveCluster.getSpec().llap().autoscaling(); + int maxReplicas = hiveCluster.getSpec().llap().replicas(); + String targetName = hiveCluster.getMetadata().getName() + "-llap"; + + Map spec = new HashMap<>(); + spec.put("scaleTargetRef", Map.of( + "apiVersion", "apps/v1", + "kind", "StatefulSet", + "name", targetName + )); + // KEDA requires idleReplicaCount < minReplicaCount. + // For scale-to-zero: min=1 (minimum when active), idle=0 (scale to zero when idle). + int minReplicaCount = Math.max(1, autoscaling.minReplicas()); + spec.put("minReplicaCount", minReplicaCount); + spec.put("maxReplicaCount", maxReplicas); + if (autoscaling.minReplicas() == 0) { + spec.put("idleReplicaCount", 0); + } + spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); + spec.put("pollingInterval", 5); + + // LLAP scale-up is aggressive: when queries need daemons, scale immediately to max. + // Scale down is slow (1 pod per cooldown) to preserve in-memory cache. + spec.put("advanced", Map.of( + "horizontalPodAutoscalerConfig", Map.of( + "behavior", Map.of( + "scaleDown", Map.of( + "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), + "policies", List.of(Map.of( + "type", "Pods", + "value", 1, + "periodSeconds", autoscaling.cooldownSeconds() + )) + ), + "scaleUp", Map.of( + "stabilizationWindowSeconds", 0, + "policies", List.of(Map.of( + "type", "Pods", + "value", maxReplicas, + "periodSeconds", 15 + )) + ) + ) + ) + )); + + // Triggers: + // 1. Prometheus for NumQueuedRequests — drives proportional scaling. + // More queued requests = more LLAP daemons needed. Scales up to max. + // 2. HS2 open sessions — activation only (wake from 0→1). + // Threshold set to maxReplicas so desired = 1/max ≈ 1 (never drives above min). + // activationThreshold=0 ensures any session activates the ScaledObject. + // + // Scale-down: HPA policy removes 1 pod per cooldown period (preserves cache). + // Idle (all sessions closed + no queued requests): after cooldownPeriod → 0. + // "or vector(0)" ensures queries return 0 (not empty) when pods don't exist. + String hs2TargetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; + String namespace = hiveCluster.getMetadata().getNamespace(); + spec.put("triggers", List.of( + Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "llap_num_queued_requests", + "query", String.format( + "avg(hadoop_llapdaemon_executornumqueuedrequests{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", + namespace, targetName), + "threshold", String.valueOf(autoscaling.scaleUpThreshold()), + "activationThreshold", "0" + ) + ), + Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hs2_open_sessions_activation", + "query", String.format( + "(max(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) > bool 0) or vector(0)", + namespace, hs2TargetName), + "threshold", String.valueOf(maxReplicas), + "activationThreshold", "0" + ) + ) + )); + + return new GenericKubernetesResourceBuilder() + .withApiVersion("keda.sh/v1alpha1") + .withKind("ScaledObject") + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "llap")) + .endMetadata() + .withAdditionalProperties(Map.of("spec", spec)) + .build(); + } + + @Override + protected String getResourceName(HiveCluster hiveCluster) { + return resourceName(hiveCluster); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-llap-scaledobject"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java index c8c044d22ce9..35fcbb8ac171 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java @@ -26,6 +26,8 @@ import io.fabric8.kubernetes.api.model.ContainerPort; import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Lifecycle; +import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.Probe; import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; @@ -34,7 +36,9 @@ import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; import org.apache.hive.kubernetes.operator.util.Labels; @@ -81,16 +85,15 @@ protected StatefulSet desired(HiveCluster hiveCluster, envVars.addAll(spec.envVars()); } - List ports = List.of( - new ContainerPortBuilder() - .withName("management").withContainerPort(15004).build(), - new ContainerPortBuilder() - .withName("shuffle").withContainerPort(15551).build(), - new ContainerPortBuilder() - .withName("web").withContainerPort(15002).build(), - new ContainerPortBuilder() - .withName("output").withContainerPort(15003).build() - ); + List ports = new ArrayList<>(); + ports.add(new ContainerPortBuilder() + .withName("management").withContainerPort(15004).build()); + ports.add(new ContainerPortBuilder() + .withName("shuffle").withContainerPort(15551).build()); + ports.add(new ContainerPortBuilder() + .withName("web").withContainerPort(15002).build()); + ports.add(new ContainerPortBuilder() + .withName("output").withContainerPort(15003).build()); Probe readinessProbe = buildTcpProbe(15004, llap.readinessProbe(), 15, 10, 3); @@ -115,11 +118,31 @@ protected StatefulSet desired(HiveCluster hiveCluster, replaceConfMountWithSubPaths(volumeMounts, "llap-config", "llap-daemon-site.xml", "core-site.xml"); + // Add Prometheus JMX Exporter when autoscaling is enabled + AutoscalingSpec autoscaling = llap.autoscaling(); + if (autoscaling.isEnabled()) { + addJmxExporter(spec.image(), COMPONENT, + initContainers, volumeMounts, volumes, envVars, ports); + } + // Pre-compute config hash for the pod template annotation. String configHash = sha256( HadoopXmlBuilder.buildXml(HiveConfigBuilder.getLlapDaemonSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + // When autoscaling is enabled and the StatefulSet already exists, preserve the current + // replica count (managed by KEDA/HPA). On initial creation: + // - minReplicas == 0: start at 0, KEDA scales up when hs2_active_sessions > 0 + // - minReplicas > 0: start at configured replicas + boolean autoscalingEnabled = llap.autoscaling() != null && llap.autoscaling().isEnabled(); + Integer replicas = llap.replicas(); + if (autoscalingEnabled) { + int initialReplicas = llap.autoscaling().minReplicas() == 0 ? 0 : llap.replicas(); + replicas = getSecondaryResource(hiveCluster, context) + .map(s -> s.getSpec().getReplicas()) + .orElse(initialReplicas); + } + StatefulSet statefulSet = new StatefulSetBuilder() .withNewMetadata() .withName(resourceName(hiveCluster)) @@ -127,7 +150,7 @@ protected StatefulSet desired(HiveCluster hiveCluster, .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) .endMetadata() .withNewSpec() - .withReplicas(llap.replicas()) + .withReplicas(replicas) .withServiceName(headlessServiceName) .withNewSelector() .withMatchLabels(selectorLabels) @@ -159,6 +182,57 @@ protected StatefulSet desired(HiveCluster hiveCluster, applySpreadAffinityIfAbsent( statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + // Graceful scale-down: poll JMX Exporter (port 9404) until all executors idle. + // Uses flat Prometheus text format — same metrics KEDA reads — not brittle JSON parsing. + if (autoscaling.isEnabled()) { + String preStopScript = String.join("\n", + "#!/bin/bash", + "echo '[preStop] Waiting for LLAP executors to become idle (polling localhost:9404/metrics)...'", + "RETRIES=0", + "while true; do", + " RESPONSE=$(curl -sf http://localhost:9404/metrics)", + " if [ $? -ne 0 ]; then", + " RETRIES=$((RETRIES+1))", + " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", + " if [ $RETRIES -ge 6 ]; then", + " echo '[preStop] JMX Exporter not responding after 60s. Proceeding with shutdown.'", + " break", + " fi", + " sleep 10; continue", + " fi", + " AVAILABLE=$(echo \"$RESPONSE\" | grep '^hadoop_llapdaemon_executornumexecutorsavailable{' | awk '{print $2}')", + " TOTAL=$(echo \"$RESPONSE\" | grep '^hadoop_llapdaemon_executornumexecutors{' | awk '{print $2}')", + " if [ -z \"$AVAILABLE\" ] || [ -z \"$TOTAL\" ]; then", + " echo '[preStop] WARNING: LLAP executor metrics not found. JMX Exporter may not be configured.'", + " break", + " fi", + " if [ \"${AVAILABLE%.*}\" -ge \"${TOTAL%.*}\" ] 2>/dev/null; then", + " echo '[preStop] All executors idle. Shutting down.'", + " break", + " fi", + " echo \"[preStop] Executors available=$AVAILABLE / total=$TOTAL — waiting...\"", + " RETRIES=0", + " sleep 10", + "done"); + Lifecycle lifecycle = new LifecycleBuilder() + .withNewPreStop() + .withNewExec() + .withCommand("/bin/bash", "-c", preStopScript) + .endExec() + .endPreStop() + .build(); + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); + statefulSet.getSpec().getTemplate().getSpec() + .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); + // Prometheus scrape annotations for JMX Exporter metrics endpoint + statefulSet.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/scrape", "true"); + statefulSet.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/port", String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); + statefulSet.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/path", "/metrics"); + } + if (spec.volumes() != null) { statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java index 46a95426c969..e1f88caacb63 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java @@ -26,6 +26,8 @@ import io.fabric8.kubernetes.api.model.ContainerPort; import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Lifecycle; +import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.Probe; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; @@ -36,6 +38,7 @@ import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; @@ -77,12 +80,11 @@ protected Deployment desired(HiveCluster hiveCluster, ConfigUtils.METASTORE_THRIFT_PORT_KEY, ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); - List ports = List.of( - new ContainerPortBuilder() - .withName("thrift").withContainerPort(thriftPort).build(), - new ContainerPortBuilder() - .withName("rest").withContainerPort(9001).build() - ); + List ports = new ArrayList<>(); + ports.add(new ContainerPortBuilder() + .withName("thrift").withContainerPort(thriftPort).build()); + ports.add(new ContainerPortBuilder() + .withName("rest").withContainerPort(9001).build()); Probe readinessProbe = buildTcpProbe(thriftPort, spec.metastore().readinessProbe(), 15, 10, 3); Probe livenessProbe = buildTcpProbe(thriftPort, spec.metastore().livenessProbe(), 60, 30, 5); @@ -107,6 +109,13 @@ protected Deployment desired(HiveCluster hiveCluster, replaceConfMountWithSubPaths(volumeMounts, "hive-config", "metastore-site.xml", "core-site.xml"); + // Add Prometheus JMX Exporter when autoscaling is enabled + AutoscalingSpec autoscaling = spec.metastore().autoscaling(); + if (autoscaling.isEnabled()) { + addJmxExporter(spec.image(), COMPONENT, + initContainers, volumeMounts, volumes, envVars, ports); + } + // Pre-compute config hash for the pod template annotation. // This ensures the Deployment is created with the correct hash // from the start (single ReplicaSet) and triggers rolling @@ -115,6 +124,19 @@ protected Deployment desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getMetastoreSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + // When autoscaling is enabled and the Deployment already exists, preserve the current + // replica count (managed by KEDA/HPA). On initial creation, start at minReplicas + // and let KEDA scale up based on load. + boolean autoscalingEnabled = spec.metastore().autoscaling() != null + && spec.metastore().autoscaling().isEnabled(); + Integer replicas = spec.metastore().replicas(); + if (autoscalingEnabled) { + int initialReplicas = Math.max(1, spec.metastore().autoscaling().minReplicas()); + replicas = getSecondaryResource(hiveCluster, context) + .map(d -> d.getSpec().getReplicas()) + .orElse(initialReplicas); + } + Deployment deployment = new DeploymentBuilder() .withNewMetadata() .withName(resourceName(hiveCluster)) @@ -122,7 +144,7 @@ protected Deployment desired(HiveCluster hiveCluster, .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) .endMetadata() .withNewSpec() - .withReplicas(spec.metastore().replicas()) + .withReplicas(replicas) .withNewSelector() .withMatchLabels(selectorLabels) .endSelector() @@ -155,6 +177,57 @@ protected Deployment desired(HiveCluster hiveCluster, applySpreadAffinityIfAbsent( deployment.getSpec().getTemplate().getSpec(), selectorLabels); + // Graceful scale-down: poll JMX Exporter (port 9404) for open_connections to drain. + // K8s removes the pod from Service Endpoints on termination, so no new requests arrive. + // Uses flat Prometheus text format — same metric KEDA reads — not brittle JSON parsing. + if (autoscaling.isEnabled()) { + String preStopScript = String.join("\n", + "#!/bin/bash", + "echo '[preStop] Waiting for open connections to drain (polling localhost:9404/metrics)...'", + "RETRIES=0", + "while true; do", + " RESPONSE=$(curl -sf http://localhost:9404/metrics)", + " if [ $? -ne 0 ]; then", + " RETRIES=$((RETRIES+1))", + " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", + " if [ $RETRIES -ge 6 ]; then", + " echo '[preStop] JMX Exporter not responding after 30s. Proceeding with shutdown.'", + " break", + " fi", + " sleep 5; continue", + " fi", + " CONNS=$(echo \"$RESPONSE\" | grep '^hive_metastore_open_connections ' | awk '{print $2}')", + " if [ -z \"$CONNS\" ]; then", + " echo '[preStop] WARNING: hive_metastore_open_connections metric not found. JMX Exporter may not be configured.'", + " break", + " fi", + " if [ \"${CONNS%.*}\" -le 0 ] 2>/dev/null; then", + " echo '[preStop] All connections drained. Shutting down.'", + " break", + " fi", + " echo \"[preStop] hive_metastore_open_connections=$CONNS — waiting...\"", + " RETRIES=0", + " sleep 5", + "done"); + Lifecycle lifecycle = new LifecycleBuilder() + .withNewPreStop() + .withNewExec() + .withCommand("/bin/bash", "-c", preStopScript) + .endExec() + .endPreStop() + .build(); + deployment.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); + deployment.getSpec().getTemplate().getSpec() + .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); + // Prometheus scrape annotations for JMX Exporter metrics endpoint + deployment.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/scrape", "true"); + deployment.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/port", String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); + deployment.getSpec().getTemplate().getMetadata().getAnnotations() + .put("prometheus.io/path", "/metrics"); + } + if (spec.volumes() != null) { deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java new file mode 100644 index 000000000000..e177e1e60138 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * PodDisruptionBudget for Hive Metastore. + * Ensures at least one Metastore pod remains available during voluntary disruptions + * to prevent catalog access failures. + */ +public class MetastorePdbDependent + extends CRUDKubernetesDependentResource { + + public MetastorePdbDependent() { + super(PodDisruptionBudget.class); + } + + @Override + protected PodDisruptionBudget desired(HiveCluster hiveCluster, + Context context) { + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "metastore")) + .endMetadata() + .withNewSpec() + .withMinAvailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(Labels.selectorForComponent(hiveCluster, "metastore")) + .endSelector() + .endSpec() + .build(); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore-pdb"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java new file mode 100644 index 000000000000..58263318468f --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages a KEDA ScaledObject for Hive Metastore autoscaling. + *

+ * Scale-up triggers (OR): + * - Open connections exceed threshold (Prometheus) + * - Pod CPU > 75% + *

+ * Scale-down triggers (AND): + * - CPU < activationValue + * - Open connections at 0 + *

+ * Cooldown: configurable (default 300s / 5 minutes) + * Guardrail: replicas should be set based on backend DB max_connections. + */ +public class MetastoreScaledObjectDependent extends HiveGenericDependentResource { + + public MetastoreScaledObjectDependent() { + super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); + } + + @Override + protected GenericKubernetesResource desired(HiveCluster hiveCluster, + Context context) { + AutoscalingSpec autoscaling = hiveCluster.getSpec().metastore().autoscaling(); + int maxReplicas = hiveCluster.getSpec().metastore().replicas(); + String targetName = hiveCluster.getMetadata().getName() + "-metastore"; + + // Threshold = max threads per pod (from metastore-site config or default 1000). + // KEDA divides total open_connections by threshold to determine desired replicas. + int maxThreads = ConfigUtils.getInt( + hiveCluster.getSpec().metastore().configOverrides(), + ConfigUtils.METASTORE_SERVER_MAX_THREADS_KEY, + ConfigUtils.METASTORE_SERVER_MAX_THREADS_HIVE_KEY, + ConfigUtils.METASTORE_SERVER_MAX_THREADS_DEFAULT); + + Map spec = new HashMap<>(); + spec.put("scaleTargetRef", Map.of( + "apiVersion", "apps/v1", + "kind", "Deployment", + "name", targetName + )); + spec.put("minReplicaCount", autoscaling.minReplicas()); + spec.put("maxReplicaCount", maxReplicas); + spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); + spec.put("pollingInterval", 30); + + spec.put("advanced", Map.of( + "horizontalPodAutoscalerConfig", Map.of( + "behavior", Map.of( + "scaleDown", Map.of( + "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), + "policies", List.of(Map.of( + "type", "Pods", + "value", 1, + "periodSeconds", 60 + )) + ), + "scaleUp", Map.of( + "stabilizationWindowSeconds", 120, + "policies", List.of(Map.of( + "type", "Percent", + "value", 50, + "periodSeconds", 60 + )) + ) + ) + ) + )); + + // Triggers: Prometheus for open connections + CPU (only when CPU requests are defined) + // "or vector(0)" ensures the query returns 0 (not empty) when no pods match. + List> triggers = new ArrayList<>(); + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hive_metastore_open_connections", + "query", String.format( + "sum(hive_metastore_open_connections{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", + hiveCluster.getMetadata().getNamespace(), targetName), + "threshold", String.valueOf(maxThreads), + "activationThreshold", "0" + ) + )); + if (hiveCluster.getSpec().metastore().resources() != null) { + // activationValue prevents idle JVM CPU from keeping the ScaledObject active. + triggers.add(Map.of( + "type", "cpu", + "metricType", "Utilization", + "metadata", Map.of( + "value", "75", + "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) + ) + )); + } + spec.put("triggers", triggers); + + return new GenericKubernetesResourceBuilder() + .withApiVersion("keda.sh/v1alpha1") + .withKind("ScaledObject") + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "metastore")) + .endMetadata() + .withAdditionalProperties(Map.of("spec", spec)) + .build(); + } + + @Override + protected String getResourceName(HiveCluster hiveCluster) { + return resourceName(hiveCluster); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore-scaledobject"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java new file mode 100644 index 000000000000..13fc6343cad0 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * PodDisruptionBudget for Tez Application Masters. + * Ensures at least one Tez AM remains available in the warm pool during voluntary disruptions. + */ +public class TezAmPdbDependent + extends CRUDKubernetesDependentResource { + + public TezAmPdbDependent() { + super(PodDisruptionBudget.class); + } + + @Override + protected PodDisruptionBudget desired(HiveCluster hiveCluster, + Context context) { + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "tezam")) + .endMetadata() + .withNewSpec() + .withMinAvailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(Labels.selectorForComponent(hiveCluster, "tezam")) + .endSelector() + .endSpec() + .build(); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-tezam-pdb"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java new file mode 100644 index 000000000000..731eb6f08d97 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages a KEDA ScaledObject for Tez Application Master autoscaling. + *

+ * Tez AMs run in a warm pool (StatefulSet). An unclaimed AM sits idle; + * a claimed AM actively orchestrates a query DAG and consumes CPU. + *

+ * Scale-up trigger: + * - Pod CPU > 60% across the StatefulSet (most AMs claimed and working) + *

+ * Scale-down trigger: + * - Pod CPU < 10% (many idle unclaimed AMs) + *

+ * Cooldown: configurable (default 600s / 10 minutes) + */ +public class TezAmScaledObjectDependent extends HiveGenericDependentResource { + + public TezAmScaledObjectDependent() { + super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); + } + + @Override + protected GenericKubernetesResource desired(HiveCluster hiveCluster, + Context context) { + AutoscalingSpec autoscaling = hiveCluster.getSpec().tezAm().autoscaling(); + int maxReplicas = hiveCluster.getSpec().tezAm().replicas(); + String targetName = hiveCluster.getMetadata().getName() + "-tezam"; + + Map spec = new HashMap<>(); + spec.put("scaleTargetRef", Map.of( + "apiVersion", "apps/v1", + "kind", "StatefulSet", + "name", targetName + )); + // KEDA requires idleReplicaCount < minReplicaCount. + // For scale-to-zero: min=1 (minimum when active), idle=0 (scale to zero when idle). + // For non-zero min: just set minReplicaCount (no idle needed). + int minReplicaCount = Math.max(1, autoscaling.minReplicas()); + spec.put("minReplicaCount", minReplicaCount); + spec.put("maxReplicaCount", maxReplicas); + if (autoscaling.minReplicas() == 0) { + spec.put("idleReplicaCount", 0); + } + spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); + spec.put("pollingInterval", 5); + + spec.put("advanced", Map.of( + "horizontalPodAutoscalerConfig", Map.of( + "behavior", Map.of( + "scaleDown", Map.of( + "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), + "policies", List.of(Map.of( + "type", "Pods", + "value", 1, + "periodSeconds", 60 + )) + ), + "scaleUp", Map.of( + "stabilizationWindowSeconds", 60, + "policies", List.of(Map.of( + "type", "Pods", + "value", 2, + "periodSeconds", 30 + )) + ) + ) + ) + )); + + // Triggers: + // 1. CPU utilization — the primary proportional scaler for warm-pool Tez AMs + // (only included when container has CPU requests defined, required by KEDA) + // 2. HS2 cross-component activation: when HS2 has open sessions, + // TezAM should be available (enables wake-from-zero) + // + // When CPU IS available: CPU drives proportional scaling, HS2 trigger is activation-only + // (threshold set to maxReplicas so it never dominates the HPA calculation). + // When CPU is NOT available: tez_session_pending_tasks drives proportional scaling + // (real query demand — tasks waiting for AM slots), with HS2 sessions for activation only. + String hs2TargetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; + String namespace = hiveCluster.getMetadata().getNamespace(); + List> triggers = new ArrayList<>(); + if (hiveCluster.getSpec().tezAm().resources() != null) { + // CPU drives proportional scaling; activationValue prevents idle JVM CPU + // from keeping the ScaledObject permanently "active" (blocks scale-to-zero). + triggers.add(Map.of( + "type", "cpu", + "metricType", "Utilization", + "metadata", Map.of( + "value", String.valueOf(autoscaling.scaleUpThreshold()), + "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) + ) + )); + // Activation-only: (sessions > bool 0) returns 0 or 1, with threshold=maxReplicas + // ensures desired = ceil(1/max) = 1 — never drives replica count above min. + // activationThreshold=0 ensures any open session wakes TezAM from zero. + // Uses hs2_open_sessions (connection-level) not hs2_active_sessions (query-level). + // "or vector(0)" ensures the query returns 0 (not empty) when HS2 has no pods. + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hs2_open_sessions_activation", + "query", String.format( + "(max(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) > bool 0) or vector(0)", + namespace, hs2TargetName), + "threshold", String.valueOf(maxReplicas), + "activationThreshold", "0" + ) + )); + } else { + // No CPU available: use tez_session_pending_tasks for proportional scaling. + // This metric reflects real query demand (tasks waiting for AM slots), unlike + // hs2_open_sessions which includes zombie/idle sessions from ungracefully closed clients. + // Threshold: scaleUpThreshold interpreted as pending-tasks-per-AM (default 60 when + // using CPU mode, but for pending tasks a lower value like 5-10 is recommended). + // "or vector(0)" ensures the query returns 0 when HS2 has no pods. + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "tez_session_pending_tasks", + "query", String.format( + "sum(tez_session_pending_tasks{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", + namespace, hs2TargetName), + "threshold", String.valueOf(autoscaling.scaleUpThreshold()), + "activationThreshold", "0" + ) + )); + // Activation-only: (sessions > bool 0) returns 0 or 1, with threshold=maxReplicas + // ensures desired = ceil(1/max) = 1 — never drives replica count above min. + // activationThreshold=0 ensures any open session wakes TezAM from zero. + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hs2_open_sessions_activation", + "query", String.format( + "(max(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) > bool 0) or vector(0)", + namespace, hs2TargetName), + "threshold", String.valueOf(maxReplicas), + "activationThreshold", "0" + ) + )); + } + spec.put("triggers", triggers); + + return new GenericKubernetesResourceBuilder() + .withApiVersion("keda.sh/v1alpha1") + .withKind("ScaledObject") + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, "tezam")) + .endMetadata() + .withAdditionalProperties(Map.of("spec", spec)) + .build(); + } + + @Override + protected String getResourceName(HiveCluster hiveCluster) { + return resourceName(hiveCluster); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-tezam-scaledobject"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java index 5cc7a3f800f3..56e60ca10403 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java @@ -23,7 +23,11 @@ import java.util.Map; import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Lifecycle; +import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; @@ -31,6 +35,7 @@ import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; @@ -107,12 +112,20 @@ protected StatefulSet desired(HiveCluster hiveCluster, .endPersistentVolumeClaim() .build()); + List ports = new ArrayList<>(); List initContainers = new ArrayList<>(); addExternalJars(spec.image(), spec.externalJars(), initContainers, volumeMounts, volumes, envVars); replaceConfMountWithSubPaths(volumeMounts, "hive-config", "hive-site.xml", "tez-site.xml", "core-site.xml"); + // Add Prometheus JMX Exporter when autoscaling is enabled + AutoscalingSpec autoscaling = tezAm.autoscaling(); + if (autoscaling.isEnabled()) { + addJmxExporter(spec.image(), COMPONENT, + initContainers, volumeMounts, volumes, envVars, ports); + } + // Pre-compute config hash for the pod template annotation. // TezAM uses the same ConfigMaps as HS2 (hive-site.xml + tez-site.xml + core-site.xml). String configHash = sha256( @@ -120,6 +133,19 @@ protected StatefulSet desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + // When autoscaling is enabled and the StatefulSet already exists, preserve the current + // replica count (managed by KEDA/HPA). On initial creation: + // - minReplicas == 0: start at 0, KEDA scales up when hs2_active_sessions > 0 + // - minReplicas > 0: start at configured replicas + boolean autoscalingEnabled = tezAm.autoscaling() != null && tezAm.autoscaling().isEnabled(); + Integer replicas = tezAm.replicas(); + if (autoscalingEnabled) { + int initialReplicas = tezAm.autoscaling().minReplicas() == 0 ? 0 : tezAm.replicas(); + replicas = getSecondaryResource(hiveCluster, context) + .map(s -> s.getSpec().getReplicas()) + .orElse(initialReplicas); + } + StatefulSet statefulSet = new StatefulSetBuilder() .withNewMetadata() .withName(resourceName(hiveCluster)) @@ -127,7 +153,7 @@ protected StatefulSet desired(HiveCluster hiveCluster, .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) .endMetadata() .withNewSpec() - .withReplicas(tezAm.replicas()) + .withReplicas(replicas) .withServiceName(headlessServiceName) .withNewSelector() .withMatchLabels(selectorLabels) @@ -145,6 +171,7 @@ protected StatefulSet desired(HiveCluster hiveCluster, .withImage(spec.image()) .withImagePullPolicy(spec.imagePullPolicy()) .withEnv(envVars) + .withPorts(ports) .withResources(buildResources(tezAm.resources())) .withVolumeMounts(volumeMounts) .endContainer() @@ -157,6 +184,50 @@ protected StatefulSet desired(HiveCluster hiveCluster, applySpreadAffinityIfAbsent( statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + // Graceful scale-down: poll JMX Exporter (port 9404) for DAGsRunning to reach 0. + // K8s removes the pod from Service Endpoints, so HS2 won't assign new DAGs to this AM. + // We read from the same Prometheus-format endpoint that KEDA uses — flat text, not brittle JSON. + if (autoscaling.isEnabled()) { + String preStopScript = String.join("\n", + "#!/bin/bash", + "echo '[preStop] Waiting for active DAGs to complete (polling localhost:9404/metrics)...'", + "RETRIES=0", + "while true; do", + " RESPONSE=$(curl -sf http://localhost:9404/metrics)", + " if [ $? -ne 0 ]; then", + " RETRIES=$((RETRIES+1))", + " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", + " if [ $RETRIES -ge 6 ]; then", + " echo '[preStop] JMX Exporter not responding after 60s. Proceeding with shutdown.'", + " break", + " fi", + " sleep 10; continue", + " fi", + " DAGS=$(echo \"$RESPONSE\" | grep '^tez_am_dagsrunning ' | awk '{print $2}')", + " if [ -z \"$DAGS\" ]; then", + " echo '[preStop] WARNING: tez_am_dagsrunning metric not found. JMX Exporter may not be configured.'", + " break", + " fi", + " if [ \"${DAGS%.*}\" -le 0 ] 2>/dev/null; then", + " echo '[preStop] No active DAGs. Safe to terminate Tez AM.'", + " break", + " fi", + " echo \"[preStop] tez_am_dagsrunning=$DAGS — waiting...\"", + " RETRIES=0", + " sleep 10", + "done"); + Lifecycle lifecycle = new LifecycleBuilder() + .withNewPreStop() + .withNewExec() + .withCommand("/bin/bash", "-c", preStopScript) + .endExec() + .endPreStop() + .build(); + statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); + statefulSet.getSpec().getTemplate().getSpec() + .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); + } + if (spec.volumes() != null) { statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java new file mode 100644 index 000000000000..bf14dac91e54 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for HiveServer2 autoscaling dependent resources. + * Returns true only when spec.hiveServer2.autoscaling.enabled is true. + */ +public class HiveServer2AutoscalingCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().hiveServer2().autoscaling().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java new file mode 100644 index 000000000000..9c01942e4a1c --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; + +/** + * Activation condition for HiveServer2 Prometheus-based ScaledObject. + * Returns true when autoscaling is enabled AND minReplicas > 0. + * When minReplicas == 0, the HTTPScaledObject is used instead (scale-to-zero). + */ +public class HiveServer2MetricScalingCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + AutoscalingSpec autoscaling = primary.getSpec().hiveServer2().autoscaling(); + return autoscaling.isEnabled() && autoscaling.minReplicas() > 0; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java index a36002dbf886..81f07269e9c9 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java @@ -41,7 +41,14 @@ public boolean isMet( return true; } - int desiredReplicas = primary.getSpec().metastore().replicas(); + // When autoscaling is enabled, wait for minReplicas (KEDA manages scaling beyond that). + // Without autoscaling, wait for all configured replicas. + int desiredReplicas; + if (primary.getSpec().metastore().autoscaling().isEnabled()) { + desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); + } else { + desiredReplicas = primary.getSpec().metastore().replicas(); + } return context.getSecondaryResources(Deployment.class).stream() .filter(d -> d.getMetadata().getName().equals(primary.getMetadata().getName() + "-metastore")) .findFirst() diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java new file mode 100644 index 000000000000..7ae91b8f7b8f --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; + +/** + * Activation condition for HiveServer2 scale-to-zero (HTTPScaledObject). + * Returns true when autoscaling is enabled AND minReplicas == 0. + * Requires the KEDA HTTP Add-on to be installed in the cluster. + */ +public class HiveServer2ScaleToZeroCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + AutoscalingSpec autoscaling = primary.getSpec().hiveServer2().autoscaling(); + return autoscaling.isEnabled() && autoscaling.minReplicas() == 0; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java new file mode 100644 index 000000000000..f4e097786b08 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for LLAP autoscaling dependent resources. + * Returns true only when spec.llap.enabled is true and spec.llap.autoscaling.enabled is true. + */ +public class LlapAutoscalingCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().llap().isEnabled() + && primary.getSpec().llap().autoscaling().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java new file mode 100644 index 000000000000..a0ac83d8a423 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for Metastore autoscaling dependent resources. + * Returns true only when spec.metastore.autoscaling.enabled is true and metastore is managed. + */ +public class MetastoreAutoscalingCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().metastore().isEnabled() + && primary.getSpec().metastore().autoscaling().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java index 7b3169f32043..58885c6e8865 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java @@ -39,7 +39,14 @@ public boolean isMet( if (!primary.getSpec().metastore().isEnabled()) { return true; } - int desiredReplicas = primary.getSpec().metastore().replicas(); + // When autoscaling is enabled, wait for minReplicas (KEDA manages scaling beyond that). + // Without autoscaling, wait for all configured replicas. + int desiredReplicas; + if (primary.getSpec().metastore().autoscaling().isEnabled()) { + desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); + } else { + desiredReplicas = primary.getSpec().metastore().replicas(); + } return dependentResource.getSecondaryResource(primary, context) .map(deployment -> deployment.getStatus() != null && deployment.getStatus().getReadyReplicas() != null diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java new file mode 100644 index 000000000000..a2ed23cbbadc --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent.condition; + +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Activation condition for Tez AM autoscaling dependent resources. + * Returns true only when spec.tezAm.enabled is true and spec.tezAm.autoscaling.enabled is true. + */ +public class TezAmAutoscalingCondition + implements Condition { + + @Override + public boolean isMet( + DependentResource dependentResource, + HiveCluster primary, + Context context) { + return primary.getSpec().tezAm().isEnabled() + && primary.getSpec().tezAm().autoscaling().isEnabled(); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java new file mode 100644 index 000000000000..eb0980fb1a1d --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Default; + +/** Autoscaling configuration for a Hive component. Uses KEDA ScaledObjects for metric-based scaling. */ +public record AutoscalingSpec( + @JsonPropertyDescription("Whether autoscaling is enabled for this component") + @Default("false") + Boolean enabled, + @JsonPropertyDescription("Minimum number of replicas (floor for scale-down). " + + "Set to 0 for scale-to-zero (HS2 requires KEDA HTTP Add-on for wake-from-zero)") + @Default("0") + Integer minReplicas, + @JsonPropertyDescription("Threshold that triggers scale-up (component-specific: " + + "sessions for HS2, queue depth for LLAP, CPU% for TezAM with resources, " + + "pending tasks per AM for TezAM without resources)") + @Default("80") + Integer scaleUpThreshold, + @JsonPropertyDescription("Percentage threshold that triggers scale-down " + + "(all conditions must be met: metric below threshold AND CPU below threshold)") + @Default("20") + Integer scaleDownThreshold, + @JsonPropertyDescription("Cooldown period in seconds after a scaling event before another can occur") + @Default("600") + Integer cooldownSeconds, + @JsonPropertyDescription("Maximum time in seconds to wait for graceful drain " + + "during scale-down before the pod is forcibly terminated") + @Default("300") + Integer gracePeriodSeconds) { + + public AutoscalingSpec { + enabled = enabled != null ? enabled : false; + minReplicas = minReplicas != null ? minReplicas : 0; + scaleUpThreshold = scaleUpThreshold != null ? scaleUpThreshold : 80; + scaleDownThreshold = scaleDownThreshold != null ? scaleDownThreshold : 20; + cooldownSeconds = cooldownSeconds != null ? cooldownSeconds : 600; + gracePeriodSeconds = gracePeriodSeconds != null ? gracePeriodSeconds : 300; + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java index 78164fb32de6..993b452ba4b4 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java @@ -51,7 +51,9 @@ public record HiveServer2Spec( @JsonPropertyDescription("Readiness probe configuration") ProbeSpec readinessProbe, @JsonPropertyDescription("Liveness probe configuration") - ProbeSpec livenessProbe) { + ProbeSpec livenessProbe, + @JsonPropertyDescription("Autoscaling configuration (requires KEDA installed in the cluster)") + AutoscalingSpec autoscaling) { public HiveServer2Spec { replicas = replicas != null ? replicas : 1; @@ -59,5 +61,7 @@ public record HiveServer2Spec( extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); externalJars = externalJars != null ? externalJars : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 0, 80, 20, 600, 300); } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java index 17ff5967ff9a..34cfc872f189 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java @@ -55,7 +55,9 @@ public record LlapSpec( @JsonPropertyDescription("LLAP service hosts identifier for ZooKeeper registration") String serviceHosts, @JsonPropertyDescription("Readiness probe configuration") - ProbeSpec readinessProbe) { + ProbeSpec readinessProbe, + @JsonPropertyDescription("Autoscaling configuration (requires KEDA installed in the cluster)") + AutoscalingSpec autoscaling) { public LlapSpec { replicas = replicas != null ? replicas : 1; @@ -65,6 +67,8 @@ public record LlapSpec( serviceHosts = serviceHosts != null ? serviceHosts : "@llap0"; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 0, 1, 0, 900, 600); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java index 307c17221ee7..51dd6dea7259 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java @@ -56,7 +56,9 @@ public record MetastoreSpec( @JsonPropertyDescription("Readiness probe configuration") ProbeSpec readinessProbe, @JsonPropertyDescription("Liveness probe configuration") - ProbeSpec livenessProbe) { + ProbeSpec livenessProbe, + @JsonPropertyDescription("Autoscaling configuration (requires KEDA installed in the cluster)") + AutoscalingSpec autoscaling) { public MetastoreSpec { replicas = replicas != null ? replicas : 1; @@ -66,6 +68,8 @@ public record MetastoreSpec( enabled = enabled != null ? enabled : true; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 1, 75, 30, 300, 60); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java index a0494c2c5e73..c973145b0080 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java @@ -52,7 +52,9 @@ public record TezAmSpec( String scratchStorageSize, @JsonPropertyDescription("StorageClass for the shared scratch PVC. " + "Must support ReadWriteMany access. If null, uses cluster default.") - String scratchStorageClassName) { + String scratchStorageClassName, + @JsonPropertyDescription("Autoscaling configuration (requires KEDA installed in the cluster)") + AutoscalingSpec autoscaling) { public TezAmSpec { replicas = replicas != null ? replicas : 1; @@ -60,6 +62,8 @@ public record TezAmSpec( scratchStorageSize = scratchStorageSize != null ? scratchStorageSize : "1Gi"; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 0, 60, 10, 600, 120); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java index 20332cb4127c..e621f7065a54 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java @@ -40,22 +40,37 @@ import org.apache.hive.kubernetes.operator.dependent.HadoopConfigMapDependent; import org.apache.hive.kubernetes.operator.dependent.HiveServer2ConfigMapDependent; import org.apache.hive.kubernetes.operator.dependent.HiveServer2DeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2PdbDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2HttpScaledObjectDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2ScaledObjectDependent; import org.apache.hive.kubernetes.operator.dependent.HiveServer2ServiceDependent; import org.apache.hive.kubernetes.operator.dependent.LlapConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapPdbDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapScaledObjectDependent; import org.apache.hive.kubernetes.operator.dependent.LlapServiceDependent; import org.apache.hive.kubernetes.operator.dependent.LlapStatefulSetDependent; import org.apache.hive.kubernetes.operator.dependent.MetastoreConfigMapDependent; import org.apache.hive.kubernetes.operator.dependent.MetastoreDeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastorePdbDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreScaledObjectDependent; import org.apache.hive.kubernetes.operator.dependent.MetastoreServiceDependent; import org.apache.hive.kubernetes.operator.dependent.SchemaInitJobDependent; import org.apache.hive.kubernetes.operator.dependent.ScratchPvcDependent; +import org.apache.hive.kubernetes.operator.dependent.TezAmPdbDependent; +import org.apache.hive.kubernetes.operator.dependent.TezAmScaledObjectDependent; import org.apache.hive.kubernetes.operator.dependent.TezAmServiceDependent; import org.apache.hive.kubernetes.operator.dependent.TezAmStatefulSetDependent; +import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2AutoscalingCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2MetricScalingCondition; import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2Precondition; +import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2ScaleToZeroCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.LlapAutoscalingCondition; import org.apache.hive.kubernetes.operator.dependent.condition.LlapEnabledCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreAutoscalingCondition; import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreEnabledCondition; import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreReadyCondition; import org.apache.hive.kubernetes.operator.dependent.condition.SchemaJobCompletedCondition; +import org.apache.hive.kubernetes.operator.dependent.condition.TezAmAutoscalingCondition; import org.apache.hive.kubernetes.operator.dependent.condition.TezAmEnabledCondition; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterStatus; @@ -102,7 +117,27 @@ @Dependent(name = "tezam-service", type = TezAmServiceDependent.class, activationCondition = TezAmEnabledCondition.class), @Dependent(name = "tezam-statefulset", type = TezAmStatefulSetDependent.class, dependsOn = {"hiveserver2-configmap", - "hadoop-configmap", "tezam-service", "scratch-pvc"}, activationCondition = TezAmEnabledCondition.class)}) + "hadoop-configmap", "tezam-service", "scratch-pvc"}, activationCondition = TezAmEnabledCondition.class), + // --- Autoscaling: KEDA ScaledObjects (conditional) --- + @Dependent(name = "hs2-scaledobject", type = HiveServer2ScaledObjectDependent.class, dependsOn = { + "hiveserver2-deployment"}, activationCondition = HiveServer2MetricScalingCondition.class), + @Dependent(name = "hs2-httpso", type = HiveServer2HttpScaledObjectDependent.class, dependsOn = { + "hiveserver2-deployment"}, activationCondition = HiveServer2ScaleToZeroCondition.class), + @Dependent(name = "metastore-scaledobject", type = MetastoreScaledObjectDependent.class, dependsOn = { + "metastore-deployment"}, activationCondition = MetastoreAutoscalingCondition.class), + @Dependent(name = "llap-scaledobject", type = LlapScaledObjectDependent.class, dependsOn = { + "llap-statefulset"}, activationCondition = LlapAutoscalingCondition.class), + @Dependent(name = "tezam-scaledobject", type = TezAmScaledObjectDependent.class, dependsOn = { + "tezam-statefulset"}, activationCondition = TezAmAutoscalingCondition.class), + // --- Autoscaling: PodDisruptionBudgets (conditional) --- + @Dependent(name = "hs2-pdb", type = HiveServer2PdbDependent.class, dependsOn = { + "hiveserver2-deployment"}, activationCondition = HiveServer2AutoscalingCondition.class), + @Dependent(name = "metastore-pdb", type = MetastorePdbDependent.class, dependsOn = { + "metastore-deployment"}, activationCondition = MetastoreAutoscalingCondition.class), + @Dependent(name = "llap-pdb", type = LlapPdbDependent.class, dependsOn = { + "llap-statefulset"}, activationCondition = LlapAutoscalingCondition.class), + @Dependent(name = "tezam-pdb", type = TezAmPdbDependent.class, dependsOn = { + "tezam-statefulset"}, activationCondition = TezAmAutoscalingCondition.class)}) public class HiveClusterReconciler implements Reconciler { private static final Logger LOG = LoggerFactory.getLogger(HiveClusterReconciler.class); @@ -172,9 +207,13 @@ private HiveClusterStatus buildStatus(HiveCluster resource, // Metastore status boolean metastoreReady; if (resource.getSpec().metastore().isEnabled()) { + // When autoscaling, desired = minReplicas (KEDA manages beyond that) + int metastoreDesired = resource.getSpec().metastore().autoscaling().isEnabled() + ? Math.max(1, resource.getSpec().metastore().autoscaling().minReplicas()) + : resource.getSpec().metastore().replicas(); ComponentStatus metastoreStatus = buildComponentStatus(context, Deployment.class, resource.getMetadata().getName() + "-metastore", - resource.getSpec().metastore().replicas(), + metastoreDesired, d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? d.getStatus().getReadyReplicas() : 0); @@ -192,15 +231,17 @@ private HiveClusterStatus buildStatus(HiveCluster resource, existingConditions)); } - // HiveServer2 status + // HiveServer2 status — when scale-to-zero, 0/0 is a valid "ready" state (idle) + int hs2Desired = resource.getSpec().hiveServer2().autoscaling().isEnabled() + ? resource.getSpec().hiveServer2().autoscaling().minReplicas() + : resource.getSpec().hiveServer2().replicas(); ComponentStatus hs2Status = buildComponentStatus(context, Deployment.class, resource.getMetadata().getName() + "-hiveserver2", - resource.getSpec().hiveServer2().replicas(), + hs2Desired, d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? d.getStatus().getReadyReplicas() : 0); status.setHiveServer2(hs2Status); - boolean hs2Ready = - hs2Status.getReadyReplicas() >= hs2Status.getDesiredReplicas() && hs2Status.getDesiredReplicas() > 0; + boolean hs2Ready = hs2Status.getReadyReplicas() >= hs2Status.getDesiredReplicas(); conditions.add(buildCondition("HiveServer2Ready", hs2Ready ? "True" : "False", hs2Ready ? "DeploymentReady" : "DeploymentNotReady", hs2Ready ? "HiveServer2 is ready" : "HiveServer2 not yet ready", @@ -208,17 +249,23 @@ private HiveClusterStatus buildStatus(HiveCluster resource, // LLAP status (optional) if (resource.getSpec().llap().isEnabled()) { + int llapDesired = resource.getSpec().llap().autoscaling().isEnabled() + ? resource.getSpec().llap().autoscaling().minReplicas() + : resource.getSpec().llap().replicas(); status.setLlap(buildComponentStatus(context, StatefulSet.class, resource.getMetadata().getName() + "-llap", - resource.getSpec().llap().replicas(), + llapDesired, s -> s.getStatus() != null && s.getStatus().getReadyReplicas() != null ? s.getStatus().getReadyReplicas() : 0)); } // TezAM status (optional) if (resource.getSpec().tezAm().isEnabled()) { + int tezAmDesired = resource.getSpec().tezAm().autoscaling().isEnabled() + ? resource.getSpec().tezAm().autoscaling().minReplicas() + : resource.getSpec().tezAm().replicas(); status.setTezAm(buildComponentStatus(context, StatefulSet.class, resource.getMetadata().getName() + "-tezam", - resource.getSpec().tezAm().replicas(), + tezAmDesired, s -> s.getStatus() != null && s.getStatus().getReadyReplicas() != null ? s.getStatus().getReadyReplicas() : 0)); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java index 0f86201817e7..2e641cdb4c1c 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java @@ -73,14 +73,34 @@ private ConfigUtils() { public static final String HIVE_LLAP_DAEMON_NUM_EXECUTORS_KEY = "hive.llap.daemon.num.executors"; + public static final String METASTORE_SERVER_MAX_THREADS_KEY = "metastore.server.max.threads"; + public static final String METASTORE_SERVER_MAX_THREADS_HIVE_KEY = "hive.metastore.server.max.threads"; + public static final int METASTORE_SERVER_MAX_THREADS_DEFAULT = 1000; + public static final String HIVE_METASTORE_URIS_KEY = "hive.metastore.uris"; public static final String HIVE_SERVER2_THRIFT_PORT_KEY = "hive.server2.thrift.port"; public static final int HIVE_SERVER2_THRIFT_PORT_DEFAULT = 10000; + public static final String HIVE_SERVER2_THRIFT_HTTP_PORT_KEY = "hive.server2.thrift.http.port"; + public static final int HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT = 10001; + + public static final String HIVE_SERVER2_THRIFT_HTTP_PATH_KEY = "hive.server2.thrift.http.path"; + public static final String HIVE_SERVER2_THRIFT_HTTP_PATH_DEFAULT = "cliservice"; + + public static final String HIVE_SERVER2_TRANSPORT_MODE_KEY = "hive.server2.transport.mode"; + public static final String HIVE_SERVER2_TRANSPORT_MODE_DEFAULT = "http"; + public static final String HIVE_SERVER2_WEBUI_PORT_KEY = "hive.server2.webui.port"; public static final int HIVE_SERVER2_WEBUI_PORT_DEFAULT = 10002; + /** Port for the Prometheus JMX Exporter agent (serves /metrics in text format). */ + public static final int PROMETHEUS_JMX_EXPORTER_PORT = 9404; + + /** Default URL for the Prometheus JMX Exporter javaagent JAR. */ + public static final String JMX_EXPORTER_JAR_URL = + "https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/1.0.1/jmx_prometheus_javaagent-1.0.1.jar"; + public static final String TEZ_AM_SESSION_MODE_KEY = "tez.am.mode.session"; public static final String TEZ_IGNORE_LIB_URIS_KEY = "tez.ignore.lib.uris"; diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java index 5db24e95d3f3..7baaf2afc34f 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java @@ -60,6 +60,12 @@ public static Map getHiveServer2HiveSite( props.put(ConfigUtils.HIVE_METASTORE_WAREHOUSE_KEY, spec.metastore().warehouseDir()); props.put(ConfigUtils.HIVE_SERVER2_ENABLE_DOAS_KEY, "false"); + props.put(ConfigUtils.HIVE_SERVER2_TRANSPORT_MODE_KEY, + ConfigUtils.HIVE_SERVER2_TRANSPORT_MODE_DEFAULT); + props.put(ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + String.valueOf(ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT)); + props.put(ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PATH_KEY, + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PATH_DEFAULT); props.put(ConfigUtils.HIVE_TEZ_EXEC_INPLACE_PROGRESS_KEY, "false"); props.put(ConfigUtils.HIVE_TEZ_EXEC_SUMMARY_KEY, "true"); props.put(ConfigUtils.HIVE_JAR_DIRECTORY_KEY, "/tmp"); @@ -95,6 +101,14 @@ public static Map getHiveServer2HiveSite( props.put("mapreduce.framework.name", "local"); } + // Enable JMX metrics when autoscaling is active. + // The Prometheus JMX Exporter agent (added by the operator) reads JMX MBeans + // and exposes them in Prometheus text format at /metrics on the metrics port. + if (spec.hiveServer2().autoscaling().isEnabled()) { + props.put("hive.server2.metrics.enabled", "true"); + props.put("hive.server2.metrics.reporter", "JMX"); + } + if (spec.hiveServer2().configOverrides() != null) { props.putAll(spec.hiveServer2().configOverrides()); } @@ -165,6 +179,14 @@ public static Map getMetastoreSite(HiveClusterSpec spec) { } } + // Enable JMX metrics when autoscaling is active. + // The Prometheus JMX Exporter agent reads JMX MBeans and exposes them + // in Prometheus text format at /metrics on the metrics port. + if (metastore.autoscaling().isEnabled()) { + props.put("metastore.metrics.enabled", "true"); + props.put("metastore.metrics.reporter", "JMX"); + } + if (metastore.configOverrides() != null) { props.putAll(metastore.configOverrides()); } From 930af89b2596d334f110a208ab3eb629fa8324e0 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 26 May 2026 20:21:59 +0530 Subject: [PATCH 2/4] Fix Scaling HMS & Refactor --- packaging/src/kubernetes/README.md | 247 ++++++------ .../hive-operator/templates/clusterrole.yaml | 2 +- .../kubernetes/operator/HiveOperatorMain.java | 15 +- .../dependent/HadoopConfigMapDependent.java | 67 ---- .../dependent/HiveConfigMapDependent.java | 153 ++++++++ .../dependent/HiveDependentResource.java | 279 ++++++++++++-- .../HiveGenericDependentResource.java | 88 +++++ .../operator/dependent/HivePdbDependent.java | 103 +++++ .../dependent/HiveScaledObjectDependent.java | 360 ++++++++++++++++++ .../HiveServer2ConfigMapDependent.java | 72 ---- .../HiveServer2DeploymentDependent.java | 111 ++---- .../HiveServer2HttpScaledObjectDependent.java | 5 +- .../HiveServer2InterceptorRouteDependent.java | 116 ++++++ .../dependent/HiveServer2PdbDependent.java | 62 --- .../HiveServer2ScaledObjectDependent.java | 149 -------- .../HiveServer2ServiceDependent.java | 87 ----- .../dependent/HiveServiceDependent.java | 165 ++++++++ .../dependent/LlapConfigMapDependent.java | 68 ---- .../operator/dependent/LlapPdbDependent.java | 62 --- .../dependent/LlapScaledObjectDependent.java | 158 -------- .../dependent/LlapServiceDependent.java | 77 ---- .../dependent/LlapStatefulSetDependent.java | 115 ++---- .../MetastoreConfigMapDependent.java | 67 ---- .../MetastoreDeploymentDependent.java | 103 ++--- .../dependent/MetastorePdbDependent.java | 62 --- .../MetastoreScaledObjectDependent.java | 152 -------- .../dependent/MetastoreServiceDependent.java | 75 ---- .../dependent/SchemaInitJobDependent.java | 6 + .../dependent/ScratchPvcDependent.java | 6 + .../operator/dependent/TezAmPdbDependent.java | 61 --- .../dependent/TezAmScaledObjectDependent.java | 200 ---------- .../dependent/TezAmServiceDependent.java | 62 --- .../dependent/TezAmStatefulSetDependent.java | 97 ++--- .../HiveServer2AutoscalingCondition.java | 41 -- .../HiveServer2MetricScalingCondition.java | 44 --- .../condition/HiveServer2Precondition.java | 60 --- .../HiveServer2ScaleToZeroCondition.java | 44 --- .../condition/LlapAutoscalingCondition.java | 42 -- .../condition/LlapEnabledCondition.java | 41 -- .../MetastoreAutoscalingCondition.java | 42 -- .../condition/MetastoreEnabledCondition.java | 39 -- .../condition/MetastoreReadyCondition.java | 56 --- .../SchemaJobCompletedCondition.java | 48 --- .../condition/TezAmAutoscalingCondition.java | 42 -- .../condition/TezAmEnabledCondition.java | 41 -- .../operator/model/HiveClusterSpec.java | 8 + .../operator/model/spec/TezAmSpec.java | 2 +- .../reconciler/HiveClusterReconciler.java | 182 ++++----- .../operator/reconciler/HiveWorkflowSpec.java | 290 ++++++++++++++ ...torMapJoinOuterGenerateResultOperator.java | 1 - 50 files changed, 1893 insertions(+), 2582 deletions(-) delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveConfigMapDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HivePdbDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2InterceptorRouteDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java delete mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java create mode 100644 packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveWorkflowSpec.java diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md index e9ccc62d949a..ac2a4d0e9584 100644 --- a/packaging/src/kubernetes/README.md +++ b/packaging/src/kubernetes/README.md @@ -555,9 +555,11 @@ kubectl get pods -n keda -l app=keda-add-ons-http-interceptor-proxy # Expected: keda-add-ons-http-interceptor-proxy-... Running ``` -> **Note:** The HTTP Add-on is required when `minReplicas: 0`. It places an interceptor -> proxy in the traffic path that detects incoming requests when HS2 has zero pods, -> automatically scaling HS2 up and holding the request until a pod is ready. +> **Note:** The HTTP Add-on is required when `minReplicas: 0`. The operator creates +> an `InterceptorRoute` CRD that configures the interceptor proxy to route traffic +> to HS2. When HS2 has zero pods, the interceptor holds incoming requests and triggers +> scale-up via an `external-push` trigger on the HS2 ScaledObject. The first request +> takes ~30-60s while the pod starts. **For Prometheus-based triggers** (HS2, HMS, LLAP), install Prometheus: @@ -597,21 +599,25 @@ helm install prometheus prometheus-community/prometheus \ | Component | Scale-Up Trigger | Scale-Down Trigger | Cooldown | Native Metric | |-----------|-----------------|-------------------|----------|---------------| -| **HiveServer2** | `hs2_active_sessions` > 80% of max **OR** CPU > 75% | `hs2_open_sessions` < 20% **AND** CPU < 30% | 10 min | `hs2_open_sessions`, `hs2_active_sessions` | -| **Metastore** | `api_get_partitions` rate spike **OR** CPU > 75% (2 min) | CPU < 30% **AND** API rate flat | 5 min | `api_get_partitions`, `open_connections` | -| **LLAP** | `NumQueuedRequests` > 0 for 1 min | `NumExecutorsAvailable == NumExecutors` (idle) | 15 min | `NumQueuedRequests`, `NumExecutorsAvailable` | -| **Tez AM** (with CPU resources) | Pod CPU > 60% (pool is busy) | Pod CPU < 10% (pool is idle) | 10 min | Standard K8s CPU | -| **Tez AM** (without CPU resources) | `tez_session_pending_tasks` > threshold | No pending tasks for cooldown | 10 min | `tez_session_pending_tasks` | +| **HiveServer2** | `hs2_open_sessions` > scaleUpThreshold **OR** CPU > 75% | Sessions below threshold **AND** CPU below scaleDownThreshold | 5 min | `hs2_open_sessions` | +| **Metastore** | `hive_metastore_open_connections` > scaleUpThreshold **OR** CPU > 75% | Connections below threshold **AND** CPU below scaleDownThreshold | 5 min | `hive_metastore_open_connections` | +| **LLAP** | Total busy slots > scaleUpThreshold (queued + busy executors) | All executors idle + no HS2 sessions | 15 min | `NumQueuedRequests`, `NumExecutorsConfigured`, `NumExecutorsAvailable` | +| **Tez AM** (with CPU resources) | Pod CPU > scaleUpThreshold% | Pod CPU < scaleDownThreshold% + no HS2 sessions | 5 min | Standard K8s CPU | +| **Tez AM** (without CPU resources) | `tez_session_pending_tasks` > scaleUpThreshold | No pending tasks + no HS2 sessions | 5 min | `tez_session_pending_tasks` | ### Scale-to-Zero Architecture When `minReplicas: 0` is configured (default for HS2, LLAP, TezAM), the cluster -scales down to zero pods when completely idle: +scales down to zero pods when completely idle. The operator uses a **unified +ScaledObject + InterceptorRoute** architecture — a single KEDA ScaledObject per +component handles both Prometheus-based scaling and wake-from-zero, while an +`InterceptorRoute` (from the KEDA HTTP Add-on) provides routing-only configuration +without creating a conflicting second ScaledObject. ``` Scale-to-Zero (Idle Detection) - 1. No active sessions/queries for cooldownPeriod seconds + 1. No open sessions/queries for cooldownPeriod seconds → KEDA detects all triggers inactive → scales HS2 to 0 (idleReplicaCount) @@ -627,7 +633,7 @@ scales down to zero pods when completely idle: Wake-from-Zero (with KEDA HTTP Add-on) 1. Beeline connects → KEDA HTTP interceptor proxy queues the - request and triggers HS2 scale-up (0 → 1) + request and triggers HS2 scale-up via external-push trigger 2. HS2 pod starts, reports hs2_open_sessions > 0 to Prometheus @@ -639,10 +645,19 @@ scales down to zero pods when completely idle: ``` +The HS2 ScaledObject combines three trigger types in a single resource: +- **Prometheus trigger** (`hs2_open_sessions`) — session-aware scaling +- **CPU trigger** — load-based scaling when resources are configured +- **external-push trigger** — wake-from-zero via the KEDA HTTP Add-on interceptor + +The `InterceptorRoute` CRD (`http.keda.sh/v1beta1`) configures only the interceptor +routing (host matching, backend target) without auto-creating a ScaledObject — this +avoids the dual-HPA conflict that `HTTPScaledObject` would cause. + > **Important:** Automatic wake-from-zero requires the KEDA HTTP Add-on. Traffic > must flow through the interceptor proxy (via Ingress or port-forward). Without the > HTTP Add-on, HS2 must be manually woken (`kubectl scale deployment/hive-hiveserver2 --replicas=1`). -> LLAP and TezAM wake automatically once HS2 reports active sessions. See +> LLAP and TezAM wake automatically once HS2 reports open sessions. See > [Connect to HiveServer2 > Connecting with Scale-to-Zero](#connecting-with-scale-to-zero-minreplicas--0) > for setup instructions. @@ -650,7 +665,7 @@ scales down to zero pods when completely idle: | Component | minReplicas | Scale-to-Zero Trigger | Wake Trigger | |-----------|-------------|----------------------|--------------| -| **HS2** | 0 | `hs2_active_sessions = 0` for cooldown | HTTP request via KEDA interceptor (or manual) | +| **HS2** | 0 | `hs2_open_sessions = 0` for cooldown | HTTP request via KEDA interceptor (`external-push`) | | **HMS** | 1 | Never (always running) | N/A | | **LLAP** | 0 | `hs2_open_sessions = 0` for cooldown | `hs2_open_sessions > 0` (cross-component) | | **TezAM** | 0 | `hs2_open_sessions = 0` + no pending tasks | `hs2_open_sessions > 0` (cross-component) | @@ -659,6 +674,9 @@ scales down to zero pods when completely idle: **CLI (with Ozone storage backend):** +Each component has sensible per-component defaults (see [Configuration Reference](#configuration-reference)). +Only `enabled=true` is needed to turn on autoscaling: + ```bash helm install hive ./helm/hive-operator \ --set cluster.database.type=postgres \ @@ -679,29 +697,15 @@ helm install hive ./helm/hive-operator \ --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ --set 'cluster.storage.envVars[2].value=ozone' \ --set cluster.hiveServer2.autoscaling.enabled=true \ - --set cluster.hiveServer2.autoscaling.minReplicas=0 \ - --set cluster.hiveServer2.autoscaling.scaleUpThreshold=80 \ - --set cluster.hiveServer2.autoscaling.cooldownSeconds=600 \ - --set cluster.hiveServer2.autoscaling.gracePeriodSeconds=300 \ --set cluster.metastore.autoscaling.enabled=true \ - --set cluster.metastore.autoscaling.minReplicas=1 \ - --set cluster.metastore.autoscaling.cooldownSeconds=300 \ - --set cluster.metastore.autoscaling.gracePeriodSeconds=60 \ --set cluster.llap.autoscaling.enabled=true \ - --set cluster.llap.autoscaling.minReplicas=0 \ - --set cluster.llap.autoscaling.cooldownSeconds=900 \ - --set cluster.llap.autoscaling.gracePeriodSeconds=600 \ - --set cluster.tezAm.autoscaling.enabled=true \ - --set cluster.tezAm.autoscaling.minReplicas=0 \ - --set cluster.tezAm.autoscaling.scaleUpThreshold=5 \ - --set cluster.tezAm.autoscaling.cooldownSeconds=600 \ - --set cluster.tezAm.autoscaling.gracePeriodSeconds=120 + --set cluster.tezAm.autoscaling.enabled=true ``` -**Values file:** +**Values file (for customizing beyond defaults):** ```yaml -# values-autoscaling.yaml +# values-autoscaling.yaml — only override what you need cluster: database: type: postgres @@ -730,48 +734,38 @@ cluster: value: "ozone" hiveServer2: - replicas: 10 # Acts as max replicas when autoscaling is enabled - resources: - requestsCpu: "1" # Required for CPU-based autoscaling trigger - requestsMemory: "2Gi" + replicas: 10 # Acts as maxReplicas when autoscaling is enabled autoscaling: enabled: true - minReplicas: 0 # Scale to zero when idle - scaleUpThreshold: 80 # Requests/sec that triggers additional pods - cooldownSeconds: 600 # 10 min before scaling back to 0 - gracePeriodSeconds: 300 + # minReplicas: 0 # default — scale to zero when idle (requires KEDA HTTP Add-on) + # scaleUpThreshold: 80 # default — avg open sessions per pod triggering scale-up + # cooldownSeconds: 300 # default — 5 min before scaling down metastore: - replicas: 6 # Acts as max replicas when autoscaling is enabled - resources: - requestsCpu: "500m" # Required for CPU-based autoscaling trigger - requestsMemory: "1Gi" + replicas: 6 # Acts as maxReplicas when autoscaling is enabled autoscaling: enabled: true - minReplicas: 1 # HMS must always be available - cooldownSeconds: 300 - gracePeriodSeconds: 60 + # minReplicas: 0 # default — scale to zero when no connections + # scaleUpThreshold: 75 # default — total open connections triggering scale-up + # cooldownSeconds: 300 # default — 5 min cooldown + # gracePeriodSeconds: 60 # default — fast drain (HMS is stateless) llap: - replicas: 8 # Acts as max replicas when autoscaling is enabled + replicas: 8 # Acts as maxReplicas when autoscaling is enabled autoscaling: enabled: true - minReplicas: 0 # Scale to zero when no queries need LLAP - cooldownSeconds: 900 # 15 min — scaling down destroys in-memory cache - gracePeriodSeconds: 600 + # minReplicas: 0 # default — scale to zero when no HS2 sessions + # scaleUpThreshold: 1 # default — total busy slots (queued+running) triggering scale-up + # cooldownSeconds: 900 # default — 15 min (scaling down destroys in-memory cache) + # gracePeriodSeconds: 600 # default — 10 min drain for in-flight fragments tezAm: - replicas: 10 # Acts as max replicas when autoscaling is enabled - resources: - requestsCpu: "500m" # Required for CPU-based autoscaling trigger - requestsMemory: "1Gi" + replicas: 10 # Acts as maxReplicas when autoscaling is enabled autoscaling: enabled: true - minReplicas: 0 # Scale to zero when no queries running - scaleUpThreshold: 60 # CPU% when resources set; pending tasks per AM otherwise - scaleDownThreshold: 10 - cooldownSeconds: 600 - gracePeriodSeconds: 120 + # minReplicas: 0 # default — scale to zero when no HS2 sessions + # scaleUpThreshold: 5 # default — CPU% (with resources) or pending tasks (without) + # gracePeriodSeconds: 120 # default — 2 min drain for DAG completion ``` ```bash @@ -790,12 +784,12 @@ When autoscaling is enabled, the operator automatically: **Exported Prometheus Metrics (per component):** -| Component | Metrics | Purpose | +| Component | Key Metrics | Purpose | |-----------|---------|---------| -| **HiveServer2** | `hs2_open_sessions`, `hs2_active_sessions`, `hs2_active_calls_*`, `tez_session_pending_tasks`, `tez_session_running_tasks`, `tez_session_task_backlog_ratio` | Session/query load, Tez AM demand | -| **Metastore** | `api_*_total`, `hive_metastore_open_connections` | API call rates, connection count | -| **LLAP** | `hadoop_llapdaemon_executornumqueuedrequests`, `hadoop_llapdaemon_*` | Executor queue depth, daemon health | -| **Tez AM** | `tez_am_*` | DAG execution metrics | +| **HiveServer2** | `hs2_open_sessions`, `hs2_active_sessions`, `tez_session_pending_tasks` | Session count (scaling trigger), Tez AM demand | +| **Metastore** | `hive_metastore_open_connections`, `api_*_total` | Connection count (scaling trigger), API call rates | +| **LLAP** | `hadoop_llapdaemon_executornumqueuedrequests`, `hadoop_llapdaemon_executornumexecutorsconfigured`, `hadoop_llapdaemon_executornumexecutorsavailable` | Total busy slots = queued + configured - available (scaling trigger) | +| **Tez AM** | Standard K8s CPU metrics or `tez_session_pending_tasks` (from HS2) | CPU utilization or pending task count (scaling trigger) | ### CPU-Based Scaling and Resource Requests @@ -804,6 +798,18 @@ and Tez AM. KEDA's CPU trigger uses the `Utilization` metric type, which is defi percentage of the container's CPU request. This means **the container must have a CPU request defined** for the trigger to work. +**How it works:** + +- The CPU trigger scales up when pod CPU utilization exceeds `scaleUpThreshold`% of the CPU request +- The `scaleDownThreshold` configures the **activation threshold** — below this CPU%, the + trigger is completely inactive (doesn't participate in scaling decisions) +- Both the CPU trigger and the Prometheus-based trigger (sessions/connections) are evaluated + independently — if **either** exceeds its threshold, the component scales up (OR logic) +- Scale-down only happens when **both** triggers agree load is low (all below threshold) + +This means a long-running CPU-intensive query will keep the pod scaled even if there's +only one session open. Conversely, many idle sessions will keep it scaled even at low CPU. + If you enable autoscaling without setting `resources` for that component, the operator will omit the CPU trigger and rely solely on the Prometheus-based trigger. For Tez AM specifically, without CPU resources the operator uses `tez_session_pending_tasks` (queued @@ -820,6 +826,7 @@ cluster: requestsMemory: "2Gi" autoscaling: enabled: true + scaleDownThreshold: 30 # CPU trigger inactive below 30% (default) metastore: resources: @@ -834,9 +841,17 @@ cluster: requestsMemory: "1Gi" autoscaling: enabled: true + scaleUpThreshold: 60 # For TezAM with resources, this IS the CPU target % + scaleDownThreshold: 10 # CPU trigger inactive below 10% ``` -> **Note:** LLAP scaling uses only Prometheus triggers (`NumQueuedRequests`) +| Setting | Effect on CPU trigger | +|---------|----------------------| +| `resources.requestsCpu` | **Enables** the CPU trigger (required) | +| `scaleUpThreshold` | CPU target % — scales up when utilization exceeds this (default 80) | +| `scaleDownThreshold` | Activation value — CPU trigger ignored below this % (default 30) | + +> **Note:** LLAP scaling uses only Prometheus triggers (total busy slots) > and does not include a CPU trigger, so LLAP does not require `resources` to > be set for autoscaling to work. @@ -885,6 +900,21 @@ Traffic flow: Client → KEDA HTTP Interceptor → (if 0 pods: scale up, wait) → HS2 Service → HS2 Pod ``` +**Via kubectl exec (no local Hive install needed):** + +The Metastore pod is always running (`minReplicas=1`) and has beeline pre-installed. +Connecting through the interceptor wakes HS2 from zero automatically: + +```bash +kubectl exec -it deploy/hive-metastore -- beeline -u "jdbc:hive2://keda-add-ons-http-interceptor-proxy.keda.svc:8080/;transportMode=http;httpPath=cliservice" +``` + +Or connect directly when HS2 is already running: + +```bash +kubectl exec -it deploy/hive-metastore -- beeline -u "jdbc:hive2://hive-hiveserver2:10001/;transportMode=http;httpPath=cliservice" +``` + **Via port-forward (local development):** ```bash @@ -895,44 +925,30 @@ kubectl port-forward -n keda svc/keda-add-ons-http-interceptor-proxy 8080:8080 beeline -u "jdbc:hive2://localhost:8080/;transportMode=http;httpPath=cliservice" ``` -**Via Ingress (production):** +**Via Ingress:** -Create an Ingress that routes your domain to the KEDA interceptor. The key is the -`upstream-vhost` annotation which rewrites the Host header to the internal service -name so the interceptor can match it — no extra operator configuration needed: +Create an Ingress that routes to the KEDA interceptor. Uses [nip.io](https://nip.io) +wildcard DNS so no `/etc/hosts` editing is needed — `hive.127.0.0.1.nip.io` resolves +to `127.0.0.1` automatically: ```bash -cat <<'EOF' | kubectl apply -f - -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: hive-interceptor - namespace: keda - annotations: - # Rewrite Host header to internal service name so KEDA interceptor can route it - nginx.ingress.kubernetes.io/upstream-vhost: "hive-hiveserver2.default.svc.cluster.local" -spec: - ingressClassName: nginx - rules: - - host: hive.example.com - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: keda-add-ons-http-interceptor-proxy - port: - number: 8080 -EOF +kubectl create ingress hive-interceptor -n keda --class=nginx \ + --rule="hive.127.0.0.1.nip.io/*=keda-add-ons-http-interceptor-proxy:8080" \ + --annotation="nginx.ingress.kubernetes.io/upstream-vhost=hive-hiveserver2.default.svc.cluster.local" ``` +> The `upstream-vhost` annotation rewrites the Host header to the internal service +> name so the KEDA interceptor can match and route the request. + Connect via beeline using the Ingress: ```bash -beeline -u "jdbc:hive2://hive.example.com:80/;transportMode=http;httpPath=cliservice" +beeline -u "jdbc:hive2://hive.127.0.0.1.nip.io:80/;transportMode=http;httpPath=cliservice" ``` +> For production, replace `hive.127.0.0.1.nip.io` with your actual domain +> (e.g., `hive.example.com`) and ensure DNS points to your ingress controller. + **Manual wake (fallback without HTTP Add-on):** ```bash @@ -1055,12 +1071,11 @@ kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hi | Value | Default | Description | |-------|---------|-------------| | `cluster..autoscaling.enabled` | `false` | Enable KEDA-based autoscaling for this component | -| `cluster..autoscaling.minReplicas` | `2` | Floor replica count during scale-down | -| `cluster..autoscaling.scaleUpThreshold` | `60-80` | Metric threshold triggering scale-up (CPU% for HS2/HMS/TezAM with resources; pending tasks per AM for TezAM without resources; queue depth for LLAP) | -| `cluster..autoscaling.scaleDownThreshold` | `10-30` | Metric percentage threshold triggering scale-down | -| `cluster..autoscaling.cooldownSeconds` | `300-900` | Minimum seconds between scaling events | -| `cluster..autoscaling.gracePeriodSeconds` | `60-600` | Max time (seconds) to wait for graceful drain | -| `cluster.hiveServer2.autoscaling.scaleToZeroHosts` | `[]` | Hostnames for KEDA HTTP interceptor routing (Ingress domain) | +| `cluster..autoscaling.minReplicas` | `0` | Floor replica count. 0 enables scale-to-zero (HS2 requires KEDA HTTP Add-on) | +| `cluster..autoscaling.scaleUpThreshold` | `80` | Metric threshold triggering scale-up (sessions for HS2, connections for HMS, busy slots for LLAP, pending tasks or CPU% for TezAM) | +| `cluster..autoscaling.scaleDownThreshold` | `30` | CPU activation threshold below which the CPU trigger is inactive | +| `cluster..autoscaling.cooldownSeconds` | `300` | Seconds to wait after last scale event before scaling down again | +| `cluster..autoscaling.gracePeriodSeconds` | `60-600` | Max time (seconds) to wait for graceful drain before forced termination | --- @@ -1101,35 +1116,21 @@ helm install hive ./helm/hive-operator -f my-values.yaml ### Remove Everything (including dependencies) ```bash -# 1. Uninstall Hive operator (removes ScaledObjects, pods, services via owner references) -helm uninstall hive -kubectl delete crd hiveclusters.hive.apache.org --ignore-not-found - -# 2. Remove HS2 Ingress (if configured for scale-to-zero wake) -kubectl delete ingress hive-hs2-ingress --ignore-not-found - -# 3. Uninstall autoscaling infrastructure (KEDA, HTTP Add-on, Prometheus) +kubectl delete hivecluster --all -A --wait=false --ignore-not-found +kubectl delete ingress hive-interceptor -n keda --ignore-not-found +helm uninstall hive --ignore-not-found +kubectl delete crd hiveclusters.hive.apache.org --wait=false --ignore-not-found +kubectl delete crd --wait=false --ignore-not-found scaledobjects.keda.sh scaledjobs.keda.sh triggerauthentications.keda.sh clustertriggerauthentications.keda.sh httpscaledobjects.http.keda.sh interceptorroutes.http.keda.sh helm uninstall http-add-on -n keda --ignore-not-found helm uninstall keda -n keda --ignore-not-found helm uninstall prometheus -n monitoring --ignore-not-found - -# 4. Remove KEDA CRDs (not removed by helm uninstall) -kubectl delete crd --ignore-not-found \ - scaledobjects.keda.sh \ - scaledjobs.keda.sh \ - triggerauthentications.keda.sh \ - clustertriggerauthentications.keda.sh \ - httpscaledobjects.http.keda.sh - -# 5. Uninstall storage and infrastructure dependencies -helm uninstall ozone postgres zookeeper --ignore-not-found - -# 6. Clean up PVCs, secrets, and namespaces -kubectl delete pvc data-zookeeper-0 --ignore-not-found -kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found +helm uninstall ozone --ignore-not-found +helm uninstall postgres --ignore-not-found +helm uninstall zookeeper --ignore-not-found +kubectl delete pvc data-zookeeper-0 data-postgres-postgresql-0 --ignore-not-found kubectl delete secret hive-db-secret --ignore-not-found -kubectl delete namespace keda --ignore-not-found -kubectl delete namespace monitoring --ignore-not-found +kubectl delete namespace keda --wait=false --ignore-not-found +kubectl delete namespace monitoring --wait=false --ignore-not-found ``` --- diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml index 791c60e0d813..d3df4a5a7868 100644 --- a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml @@ -60,5 +60,5 @@ rules: verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] # KEDA HTTP Add-on for scale-to-zero (wake-from-zero on HTTP request) - apiGroups: ["http.keda.sh"] - resources: ["httpscaledobjects"] + resources: ["httpscaledobjects", "interceptorroutes"] verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java index 55bd3372a40d..d02f08fff038 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java @@ -19,7 +19,11 @@ package org.apache.hive.kubernetes.operator; import io.javaoperatorsdk.operator.Operator; +import io.javaoperatorsdk.operator.api.config.ControllerConfiguration; +import io.javaoperatorsdk.operator.api.config.ResolvedControllerConfiguration; +import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.reconciler.HiveClusterReconciler; +import org.apache.hive.kubernetes.operator.reconciler.HiveWorkflowSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,7 +40,16 @@ private HiveOperatorMain() { public static void main(String[] args) { LOG.info("Starting Hive Kubernetes Operator"); Operator operator = new Operator(); - operator.register(new HiveClusterReconciler()); + HiveClusterReconciler reconciler = new HiveClusterReconciler(); + // Get the annotation-derived base config, then inject our programmatic workflow spec. + ControllerConfiguration baseConfig = + operator.getConfigurationService().getConfigurationFor(reconciler); + HiveWorkflowSpec workflowSpec = new HiveWorkflowSpec(); + ((ResolvedControllerConfiguration) baseConfig) + .setWorkflowSpec(workflowSpec); + LOG.info("Registered workflow with {} dependent resource specs", + workflowSpec.getDependentResourceSpecs().size()); + operator.register(reconciler, baseConfig); operator.start(); LOG.info("Hive Kubernetes Operator started successfully"); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java deleted file mode 100644 index 6c0f9308dbc1..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the Hadoop core-site.xml ConfigMap for filesystem configuration. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=hadoop-config," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class HadoopConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "hadoop-config"; - - public HadoopConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - Map props = - HiveConfigBuilder.getHadoopCoreSite(hiveCluster.getSpec()); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("core-site.xml", HadoopXmlBuilder.buildXml(props)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-hadoop-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveConfigMapDependent.java new file mode 100644 index 000000000000..935b47e094cb --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveConfigMapDependent.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Unified ConfigMap dependent resource for all Hive component configurations. + * Subclassed per component to define the specific XML data and label selector. + */ +public abstract class HiveConfigMapDependent extends HiveDependentResource { + + private final String component; + private final String suffix; + + protected HiveConfigMapDependent(String component, String suffix) { + super(ConfigMap.class); + this.component = component; + this.suffix = suffix; + } + + @Override + protected String getSecondaryResourceName(HiveCluster primary, Context context) { + return primary.getMetadata().getName() + "-" + suffix; + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, Context context) { + ConfigMapBuilder builder = + new ConfigMapBuilder().withNewMetadata().withName(hiveCluster.getMetadata().getName() + "-" + suffix) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, component)).endMetadata(); + addData(builder, hiveCluster); + return builder.build(); + } + + /** + * Subclasses add their specific XML data entries. + */ + protected abstract void addData(ConfigMapBuilder builder, HiveCluster hiveCluster); + + /** + * Hadoop core-site.xml ConfigMap for filesystem configuration. + */ + @KubernetesDependent(informer = @Informer(labelSelector = "app.kubernetes.io/component=hadoop-config," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator")) + public static class Hadoop extends HiveConfigMapDependent { + public Hadoop() { + super("hadoop-config", "hadoop-config"); + } + + @Override + protected void addData(ConfigMapBuilder builder, HiveCluster hiveCluster) { + builder.addToData("core-site.xml", + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(hiveCluster.getSpec()))); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hadoop-config"; + } + } + + /** + * Metastore metastore-site.xml ConfigMap. + */ + @KubernetesDependent(informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator")) + public static class Metastore extends HiveConfigMapDependent { + public Metastore() { + super("metastore", "metastore-config"); + } + + @Override + protected void addData(ConfigMapBuilder builder, HiveCluster hiveCluster) { + builder.addToData("metastore-site.xml", + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getMetastoreSite(hiveCluster.getSpec()))); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore-config"; + } + } + + /** + * HiveServer2 hive-site.xml + tez-site.xml ConfigMap. + */ + @KubernetesDependent(informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator")) + public static class HiveServer2 extends HiveConfigMapDependent { + public HiveServer2() { + super("hiveserver2", "hiveserver2-config"); + } + + @Override + protected void addData(ConfigMapBuilder builder, HiveCluster hiveCluster) { + HiveClusterSpec spec = hiveCluster.getSpec(); + builder.addToData("hive-site.xml", + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec))); + builder.addToData("tez-site.xml", HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec))); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-config"; + } + } + + /** + * LLAP llap-daemon-site.xml ConfigMap. + */ + @KubernetesDependent(informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator")) + public static class Llap extends HiveConfigMapDependent { + public Llap() { + super("llap", "llap-config"); + } + + @Override + protected void addData(ConfigMapBuilder builder, HiveCluster hiveCluster) { + builder.addToData("llap-daemon-site.xml", + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getLlapDaemonSite(hiveCluster.getSpec()))); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-llap-config"; + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java index 9b1cb75d6553..7fc1290d5375 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import io.fabric8.kubernetes.api.model.AffinityBuilder; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.ContainerBuilder; @@ -80,32 +81,41 @@ protected HiveDependentResource(Class resourceType) { super(resourceType); } + /** - * Catches 409 AlreadyExists during resource creation caused by - * informer lag — the resource exists on the API server but - * the informer cache hasn't indexed it yet, so JOSDK calls - * create directly. + * Returns the expected Kubernetes resource name for this dependent. + * Used to disambiguate when multiple dependents share the same resource + * type (e.g., multiple ConfigMap or Service dependents). Subclasses that + * share a resource type MUST override this method. + * + * @throws IllegalStateException if not overridden and disambiguation is needed */ - @Override - protected R handleCreate(R desired, P primary, Context

context) { - try { - return super.handleCreate(desired, primary, context); - } catch (KubernetesClientException e) { - if (e.getCode() == 409) { - LOG.info("Resource {} already exists (informer lag), " - + "will reconcile on next event", - desired.getMetadata().getName()); - return desired; - } - throw e; - } + protected String getSecondaryResourceName(P primary, Context

context) { + throw new IllegalStateException( + getClass().getSimpleName() + " must override getSecondaryResourceName() " + + "when multiple dependents share the same resource type"); } @Override public Optional getSecondaryResource(P primary, Context

context) { return eventSource() - .flatMap(es -> es.getSecondaryResource(primary)); + .flatMap(es -> { + Set resources = es.getSecondaryResources(primary); + if (resources.isEmpty()) { + return Optional.empty(); + } + // Always filter by expected name — even when only one resource + // is in the cache. Without this, a single Deployment (e.g. + // metastore) would be handed to HiveServer2's matcher, causing + // a cross-component update loop. + String expectedName = getSecondaryResourceName(primary, + context); + return resources.stream() + .filter(r -> expectedName.equals( + r.getMetadata().getName())) + .findFirst(); + }); } /** @@ -127,6 +137,171 @@ public Matcher.Result match(R actualResource, R desired, return super.match(actualResource, desired, primary, context); } + /** + * Handles 409 Conflict errors during resource creation caused by informer + * cache lag. When the operator creates a resource but the informer hasn't + * yet received the creation event, the framework may attempt to create it + * again. Kubernetes rejects the duplicate with 409 — this handler absorbs + * that expected race and lets the next reconciliation pick up the resource + * from the updated cache. + */ + @Override + protected R handleCreate(R desired, P primary, Context

context) { + try { + return super.handleCreate(desired, primary, context); + } catch (KubernetesClientException e) { + if (e.getCode() == 409) { + LOG.info("Resource {} already exists (informer lag), " + + "will reconcile on next event", + desired.getMetadata().getName()); + return desired; + } + throw e; + } + } + + /** + * Resolves the replica count to set in the desired workload spec. + * When autoscaling is enabled and the workload already exists, the current + * replica count is preserved (KEDA/HPA manages it). On initial creation + * the provided fallback is used. + * + * @param primary the HiveCluster primary resource + * @param context the reconciliation context + * @param autoscaling autoscaling spec for this component (may be null) + * @param staticReplicas replica count from the spec (used when autoscaling is off) + * @param initialReplicas replica count on first creation when autoscaling is on + */ + @SuppressWarnings("unchecked") + protected Integer resolveReplicaCount(P primary, Context

context, + AutoscalingSpec autoscaling, int staticReplicas, int initialReplicas) { + if (autoscaling == null || !autoscaling.isEnabled()) { + return staticReplicas; + } + return getSecondaryResource(primary, context) + .map(existing -> { + if (existing instanceof io.fabric8.kubernetes.api.model.apps.Deployment d) { + return d.getSpec().getReplicas(); + } else if (existing instanceof io.fabric8.kubernetes.api.model.apps.StatefulSet s) { + return s.getSpec().getReplicas(); + } + return initialReplicas; + }) + .orElse(initialReplicas); + } + + /** + * Builds a preStop drain script that polls a single Prometheus metric + * (from the JMX Exporter at localhost:9404/metrics) until the value + * reaches zero, then exits to allow graceful pod termination. + * + * @param startupMessage logged at the start (e.g. "Waiting for open connections to drain") + * @param metricName Prometheus metric name (used in grep and log messages) + * @param varName shell variable name for the extracted value (e.g. "CONNS") + * @param idleMessage logged when idle condition is met (e.g. "All connections drained. Shutting down.") + * @param sleepSeconds polling interval in seconds + * @param maxRetries max consecutive curl failures before giving up + * @param prefixCommands optional commands to run before the polling loop (may be null) + */ + protected static String buildDrainScript( + String startupMessage, String metricName, String varName, + String idleMessage, int sleepSeconds, int maxRetries, + List prefixCommands) { + List lines = new ArrayList<>(); + lines.add("#!/bin/bash"); + if (prefixCommands != null) { + lines.addAll(prefixCommands); + } + lines.add("echo '[preStop] " + startupMessage + + " (polling localhost:9404/metrics)...'"); + lines.add("RETRIES=0"); + lines.add("while true; do"); + lines.add(" RESPONSE=$(curl -sf http://localhost:9404/metrics)"); + lines.add(" if [ $? -ne 0 ]; then"); + lines.add(" RETRIES=$((RETRIES+1))"); + lines.add(" echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\""); + lines.add(" if [ $RETRIES -ge " + maxRetries + " ]; then"); + lines.add(" echo '[preStop] JMX Exporter not responding after " + + (maxRetries * sleepSeconds) + "s. Proceeding with shutdown.'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" sleep " + sleepSeconds + "; continue"); + lines.add(" fi"); + lines.add(" " + varName + "=$(echo \"$RESPONSE\" | grep '^" + + metricName + " ' | awk '{print $2}')"); + lines.add(" if [ -z \"$" + varName + "\" ]; then"); + lines.add(" echo '[preStop] WARNING: " + metricName + + " metric not found. JMX Exporter may not be configured.'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" if [ \"${" + varName + "%.*}\" -le 0 ] 2>/dev/null; then"); + lines.add(" echo '[preStop] " + idleMessage + "'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" echo \"[preStop] " + metricName + "=$" + varName + " - waiting...\""); + lines.add(" RETRIES=0"); + lines.add(" sleep " + sleepSeconds); + lines.add("done"); + return String.join("\n", lines); + } + + /** + * Builds a preStop drain script that polls two Prometheus metrics and + * waits until available >= total (all executors idle). Used by LLAP. + * + * @param startupMessage logged at the start + * @param metricGrepA grep pattern for the first metric (e.g. includes trailing '{') + * @param varNameA shell variable for the first metric value (e.g. "AVAILABLE") + * @param metricGrepB grep pattern for the second metric + * @param varNameB shell variable for the second metric value (e.g. "TOTAL") + * @param notFoundWarning warning message when metrics are not found + * @param idleMessage logged when idle condition is met + * @param waitingFormat format for waiting log (with shell variable references) + * @param sleepSeconds polling interval in seconds + * @param maxRetries max consecutive curl failures before giving up + */ + protected static String buildDualMetricDrainScript( + String startupMessage, + String metricGrepA, String varNameA, + String metricGrepB, String varNameB, + String notFoundWarning, String idleMessage, + String waitingFormat, int sleepSeconds, int maxRetries) { + List lines = new ArrayList<>(); + lines.add("#!/bin/bash"); + lines.add("echo '[preStop] " + startupMessage + + " (polling localhost:9404/metrics)...'"); + lines.add("RETRIES=0"); + lines.add("while true; do"); + lines.add(" RESPONSE=$(curl -sf http://localhost:9404/metrics)"); + lines.add(" if [ $? -ne 0 ]; then"); + lines.add(" RETRIES=$((RETRIES+1))"); + lines.add(" echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\""); + lines.add(" if [ $RETRIES -ge " + maxRetries + " ]; then"); + lines.add(" echo '[preStop] JMX Exporter not responding after " + + (maxRetries * sleepSeconds) + "s. Proceeding with shutdown.'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" sleep " + sleepSeconds + "; continue"); + lines.add(" fi"); + lines.add(" " + varNameA + "=$(echo \"$RESPONSE\" | grep '^" + + metricGrepA + "' | awk '{print $2}')"); + lines.add(" " + varNameB + "=$(echo \"$RESPONSE\" | grep '^" + + metricGrepB + "' | awk '{print $2}')"); + lines.add(" if [ -z \"$" + varNameA + "\" ] || [ -z \"$" + varNameB + "\" ]; then"); + lines.add(" echo '[preStop] WARNING: " + notFoundWarning + "'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" if [ \"${" + varNameA + "%.*}\" -ge \"${" + varNameB + "%.*}\" ] 2>/dev/null; then"); + lines.add(" echo '[preStop] " + idleMessage + "'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" echo \"[preStop] " + waitingFormat + "\""); + lines.add(" RETRIES=0"); + lines.add(" sleep " + sleepSeconds); + lines.add("done"); + return String.join("\n", lines); + } + /** * Computes a SHA-256 hash of the given input strings. * Used to annotate pod templates so that config changes trigger rolling updates. @@ -237,8 +412,8 @@ protected static void buildMetastoreVolumes( .withMountPath(CONF_MOUNT_PATH).build()); volumes.add(buildProjectedConfigVolume("hive-config", - MetastoreConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); + HiveConfigMapDependent.Metastore.resourceName(hiveCluster), + HiveConfigMapDependent.Hadoop.resourceName(hiveCluster))); } /** Builds Kubernetes ResourceRequirements from the operator's spec. */ @@ -424,6 +599,65 @@ protected static Probe buildTcpProbe(int port, ProbeSpec spec, int defaultInitia return builder.build(); } + /** + * Applies the autoscaling lifecycle to a workload's pod template: sets a preStop + * exec lifecycle hook, terminationGracePeriodSeconds, and Prometheus scrape annotations. + * + * @param podSpec the pod spec of the workload (Deployment or StatefulSet) + * @param podMetadata the pod template metadata (for annotations) + * @param preStopScript the shell script to run in the preStop hook + * @param gracePeriodSeconds termination grace period + */ + protected static void applyAutoscalingLifecycle( + io.fabric8.kubernetes.api.model.PodSpec podSpec, + io.fabric8.kubernetes.api.model.ObjectMeta podMetadata, + String preStopScript, int gracePeriodSeconds) { + io.fabric8.kubernetes.api.model.Lifecycle lifecycle = + new io.fabric8.kubernetes.api.model.LifecycleBuilder() + .withNewPreStop() + .withNewExec() + .withCommand("/bin/bash", "-c", preStopScript) + .endExec() + .endPreStop() + .build(); + podSpec.getContainers().get(0).setLifecycle(lifecycle); + podSpec.setTerminationGracePeriodSeconds((long) gracePeriodSeconds); + podMetadata.getAnnotations().put("prometheus.io/scrape", "true"); + podMetadata.getAnnotations().put("prometheus.io/port", + String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); + podMetadata.getAnnotations().put("prometheus.io/path", "/metrics"); + } + + /** + * Appends user-provided volumes and volume mounts to a workload's pod template. + * Handles both global (spec-level) and component-specific extras. + * + * @param podSpec the pod spec + * @param globalVolumes spec.volumes() (may be null) + * @param globalVolumeMounts spec.volumeMounts() (may be null) + * @param extraVolumes component-specific extraVolumes (may be null) + * @param extraVolumeMounts component-specific extraVolumeMounts (may be null) + */ + protected static void appendUserVolumes( + io.fabric8.kubernetes.api.model.PodSpec podSpec, + List globalVolumes, + List globalVolumeMounts, + List extraVolumes, + List extraVolumeMounts) { + if (globalVolumes != null) { + podSpec.getVolumes().addAll(globalVolumes); + } + if (globalVolumeMounts != null) { + podSpec.getContainers().get(0).getVolumeMounts().addAll(globalVolumeMounts); + } + if (extraVolumes != null) { + podSpec.getVolumes().addAll(extraVolumes); + } + if (extraVolumeMounts != null) { + podSpec.getContainers().get(0).getVolumeMounts().addAll(extraVolumeMounts); + } + } + /** Path where the JMX Exporter agent JAR is stored inside the pod. */ protected static final String JMX_EXPORTER_DIR = "/opt/jmx-exporter"; protected static final String JMX_EXPORTER_JAR = JMX_EXPORTER_DIR + "/jmx_prometheus_javaagent.jar"; @@ -485,7 +719,8 @@ protected static void addJmxExporter( // Expose the metrics port ports.add(new io.fabric8.kubernetes.api.model.ContainerPortBuilder() .withName("metrics") - .withContainerPort(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT).build()); + .withContainerPort(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT) + .withProtocol("TCP").build()); // Add javaagent flag to the appropriate JVM opts env var. // LLAP uses LLAP_DAEMON_OPTS (its startup script ignores SERVICE_OPTS). @@ -536,7 +771,7 @@ private static String buildJmxExporterConfig(String component) { sb.append("- pattern: 'metrics<>Count'\n"); sb.append(" name: api_$1_total\n"); sb.append(" type: COUNTER\n"); - sb.append("- pattern: 'metrics<>Value'\n"); + sb.append("- pattern: 'metrics<>Count'\n"); sb.append(" name: hive_metastore_open_connections\n"); sb.append(" type: GAUGE\n"); break; diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java index feff8775a6f4..6db1482f3bd1 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveGenericDependentResource.java @@ -18,11 +18,16 @@ package org.apache.hive.kubernetes.operator.dependent; +import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.javaoperatorsdk.operator.api.config.informer.InformerEventSourceConfiguration; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.EventSourceContext; import io.javaoperatorsdk.operator.api.reconciler.dependent.GarbageCollected; import io.javaoperatorsdk.operator.processing.GroupVersionKind; import io.javaoperatorsdk.operator.processing.dependent.Creator; @@ -52,6 +57,22 @@ protected HiveGenericDependentResource(GroupVersionKind gvk) { super(gvk); } + /** + * Adds a generation-aware update filter so that KEDA/controller status + * patches (which don't increment metadata.generation) do not trigger + * unnecessary reconciliation loops. + */ + @Override + protected InformerEventSourceConfiguration.Builder + informerConfigurationBuilder(EventSourceContext context) { + return super.informerConfigurationBuilder(context) + .withOnUpdateFilter((newResource, oldResource) -> { + Long newGen = newResource.getMetadata().getGeneration(); + Long oldGen = oldResource.getMetadata().getGeneration(); + return !Objects.equals(newGen, oldGen); + }); + } + /** * Returns the expected Kubernetes resource name for this dependent given the primary. * Used to discriminate between multiple secondary resources of the same GVK @@ -70,4 +91,71 @@ public Optional getSecondaryResource( .filter(r -> expectedName.equals(r.getMetadata().getName())) .findFirst(); } + + /** + * Builds the nested "advanced" HPA behavior configuration for a KEDA ScaledObject. + * + * @param scaleDownStabilization stabilizationWindowSeconds for scale-down + * @param scaleDownPolicyType policy type (e.g. "Pods", "Percent") + * @param scaleDownValue policy value + * @param scaleDownPeriod policy periodSeconds + * @param scaleUpStabilization stabilizationWindowSeconds for scale-up + * @param scaleUpPolicyType policy type (e.g. "Pods", "Percent") + * @param scaleUpValue policy value + * @param scaleUpPeriod policy periodSeconds + */ + protected static Map buildHpaBehavior( + int scaleDownStabilization, String scaleDownPolicyType, + int scaleDownValue, int scaleDownPeriod, + int scaleUpStabilization, String scaleUpPolicyType, + int scaleUpValue, int scaleUpPeriod) { + return Map.of( + "horizontalPodAutoscalerConfig", Map.of( + "behavior", Map.of( + "scaleDown", Map.of( + "stabilizationWindowSeconds", scaleDownStabilization, + "policies", List.of(Map.of( + "type", scaleDownPolicyType, + "value", scaleDownValue, + "periodSeconds", scaleDownPeriod + )) + ), + "scaleUp", Map.of( + "stabilizationWindowSeconds", scaleUpStabilization, + "policies", List.of(Map.of( + "type", scaleUpPolicyType, + "value", scaleUpValue, + "periodSeconds", scaleUpPeriod + )) + ) + ) + ) + ); + } + + /** + * Builds the HS2 cross-component activation trigger used by LLAP and TezAM. + * Uses {@code (max(hs2_open_sessions{...}) > bool 0) or vector(0)} so the + * result is always 0 or 1, preventing zombie sessions from driving proportional scaling. + * Threshold is set to maxReplicas so desired = ceil(1/max) = 1 (activation only). + * + * @param namespace the Kubernetes namespace + * @param hs2TargetName the HS2 deployment name (for pod label matching) + * @param maxReplicas the max replicas of the component (used as threshold) + */ + protected static Map buildHs2ActivationTrigger( + String namespace, String hs2TargetName, int maxReplicas) { + return Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hs2_open_sessions_activation", + "query", String.format( + "(max(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) > bool 0) or vector(0)", + namespace, hs2TargetName), + "threshold", String.valueOf(maxReplicas), + "activationThreshold", "0" + ) + ); + } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HivePdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HivePdbDependent.java new file mode 100644 index 000000000000..2942a5b674bf --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HivePdbDependent.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Unified PodDisruptionBudget dependent resource for all Hive components. + * Ensures at least one pod remains available during voluntary disruptions + * (scale-down, node drain, rolling updates). + *

+ * Subclassed per component (HS2, Metastore, LLAP, TezAM) only to satisfy + * JOSDK's requirement for distinct no-arg-constructible classes in the workflow. + */ +public abstract class HivePdbDependent + extends HiveDependentResource { + + private final String component; + + protected HivePdbDependent(String component) { + super(PodDisruptionBudget.class); + this.component = component; + } + + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return primary.getMetadata().getName() + "-" + component + "-pdb"; + } + + @Override + protected PodDisruptionBudget desired(HiveCluster hiveCluster, + Context context) { + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-" + component + "-pdb") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, component)) + .endMetadata() + .withNewSpec() + .withMinAvailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(Labels.selectorForComponent(hiveCluster, component)) + .endSelector() + .endSpec() + .build(); + } + + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class HiveServer2 extends HivePdbDependent { + public HiveServer2() { super("hiveserver2"); } + } + + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class Metastore extends HivePdbDependent { + public Metastore() { super("metastore"); } + } + + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class Llap extends HivePdbDependent { + public Llap() { super("llap"); } + } + + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class TezAm extends HivePdbDependent { + public TezAm() { super("tezam"); } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java new file mode 100644 index 000000000000..f5efb1302bd3 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Unified KEDA ScaledObject dependent resource for metric-based autoscaling. + * Subclassed per component to define component-specific triggers, HPA behavior, + * and target workload kind. + *

+ * Note: When HS2 minReplicas is 0, the ScaledObject includes an external-push + * trigger from the KEDA HTTP Add-on (via InterceptorRoute) for wake-from-zero. + */ +public abstract class HiveScaledObjectDependent extends HiveGenericDependentResource { + + private final String component; + private final String targetKind; + + protected HiveScaledObjectDependent(String component, String targetKind) { + super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); + this.component = component; + this.targetKind = targetKind; + } + + @Override + protected GenericKubernetesResource desired(HiveCluster hiveCluster, + Context context) { + AutoscalingSpec autoscaling = getAutoscalingSpec(hiveCluster); + int maxReplicas = getMaxReplicas(hiveCluster); + String targetName = hiveCluster.getMetadata().getName() + "-" + component; + + Map spec = new HashMap<>(); + spec.put("scaleTargetRef", Map.of( + "apiVersion", "apps/v1", + "kind", targetKind, + "name", targetName + )); + int minReplicaCount = Math.max(1, autoscaling.minReplicas()); + spec.put("minReplicaCount", minReplicaCount); + spec.put("maxReplicaCount", maxReplicas); + if (autoscaling.minReplicas() == 0) { + spec.put("idleReplicaCount", 0); + } + spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); + spec.put("pollingInterval", getPollingInterval()); + spec.put("advanced", getAdvanced(hiveCluster, autoscaling, maxReplicas)); + spec.put("triggers", getTriggers(hiveCluster, autoscaling, maxReplicas, targetName)); + + return new GenericKubernetesResourceBuilder() + .withApiVersion("keda.sh/v1alpha1") + .withKind("ScaledObject") + .withNewMetadata() + .withName(targetName + "-scaledobject") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, component)) + .endMetadata() + .withAdditionalProperties(Map.of("spec", spec)) + .build(); + } + + @Override + protected String getResourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-" + component + "-scaledobject"; + } + + /** Returns the autoscaling spec for the component. */ + protected abstract AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster); + + /** Returns max replicas (typically the static replicas count from spec). */ + protected abstract int getMaxReplicas(HiveCluster hiveCluster); + + /** Returns the KEDA polling interval in seconds. */ + protected abstract int getPollingInterval(); + + /** Returns the "advanced" section (HPA behavior configuration). */ + protected abstract Map getAdvanced( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, int maxReplicas); + + /** Returns the list of KEDA triggers. */ + protected abstract List> getTriggers( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, + int maxReplicas, String targetName); + + /** + * HiveServer2 ScaledObject: scales on hs2_active_sessions + CPU. + */ + public static class HiveServer2 extends HiveScaledObjectDependent { + public HiveServer2() { super("hiveserver2", "Deployment"); } + + @Override + protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { + return hiveCluster.getSpec().hiveServer2().autoscaling(); + } + + @Override + protected int getMaxReplicas(HiveCluster hiveCluster) { + return hiveCluster.getSpec().hiveServer2().replicas(); + } + + @Override + protected int getPollingInterval() { return 30; } + + @Override + protected Map getAdvanced( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, int maxReplicas) { + return buildHpaBehavior( + autoscaling.cooldownSeconds(), "Pods", 1, 60, + 60, "Percent", 100, 60); + } + + @Override + protected List> getTriggers( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, + int maxReplicas, String targetName) { + List> triggers = new ArrayList<>(); + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hs2_open_sessions", + "query", String.format( + "avg(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", + hiveCluster.getMetadata().getNamespace(), targetName), + "threshold", String.valueOf(autoscaling.scaleUpThreshold()), + "activationThreshold", "0" + ) + )); + if (hiveCluster.getSpec().hiveServer2().resources() != null) { + triggers.add(Map.of( + "type", "cpu", + "metricType", "Utilization", + "metadata", Map.of( + "value", String.valueOf(autoscaling.scaleUpThreshold()), + "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) + ) + )); + } + // When scale-to-zero is enabled, add KEDA HTTP Add-on external-push + // trigger to wake HS2 from 0 when requests arrive at the interceptor. + if (autoscaling.minReplicas() == 0) { + String routeName = HiveServer2InterceptorRouteDependent.resourceName(hiveCluster); + triggers.add(Map.of( + "type", "external-push", + "metadata", Map.of( + "scalerAddress", + "keda-add-ons-http-external-scaler.keda:9090", + "interceptorRoute", routeName + ) + )); + } + return triggers; + } + } + + /** + * Metastore ScaledObject: scales on open_connections + CPU. + */ + public static class Metastore extends HiveScaledObjectDependent { + public Metastore() { super("metastore", "Deployment"); } + + @Override + protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { + return hiveCluster.getSpec().metastore().autoscaling(); + } + + @Override + protected int getMaxReplicas(HiveCluster hiveCluster) { + return hiveCluster.getSpec().metastore().replicas(); + } + + @Override + protected int getPollingInterval() { return 30; } + + @Override + protected Map getAdvanced( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, int maxReplicas) { + return buildHpaBehavior( + autoscaling.cooldownSeconds(), "Pods", 1, 60, + 120, "Percent", 50, 60); + } + + @Override + protected List> getTriggers( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, + int maxReplicas, String targetName) { + List> triggers = new ArrayList<>(); + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "hive_metastore_open_connections", + "query", String.format( + "sum(hive_metastore_open_connections{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", + hiveCluster.getMetadata().getNamespace(), targetName), + "threshold", String.valueOf(autoscaling.scaleUpThreshold()), + "activationThreshold", "0" + ) + )); + if (hiveCluster.getSpec().metastore().resources() != null) { + triggers.add(Map.of( + "type", "cpu", + "metricType", "Utilization", + "metadata", Map.of( + "value", String.valueOf(autoscaling.scaleUpThreshold()), + "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) + ) + )); + } + return triggers; + } + } + + /** + * LLAP ScaledObject: scales on NumQueuedRequests + HS2 activation trigger. + * Scale-down is slow (preserves in-memory cache). + */ + public static class Llap extends HiveScaledObjectDependent { + public Llap() { super("llap", "StatefulSet"); } + + @Override + protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { + return hiveCluster.getSpec().llap().autoscaling(); + } + + @Override + protected int getMaxReplicas(HiveCluster hiveCluster) { + return hiveCluster.getSpec().llap().replicas(); + } + + @Override + protected int getPollingInterval() { return 5; } + + @Override + protected Map getAdvanced( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, int maxReplicas) { + return buildHpaBehavior( + autoscaling.cooldownSeconds(), "Pods", 1, autoscaling.cooldownSeconds(), + 0, "Pods", maxReplicas, 15); + } + + @Override + protected List> getTriggers( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, + int maxReplicas, String targetName) { + String hs2TargetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; + String namespace = hiveCluster.getMetadata().getNamespace(); + return List.of( + Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "llap_total_busy_slots", + "query", String.format( + "avg(" + + "hadoop_llapdaemon_executornumqueuedrequests{namespace=\"%1$s\",pod=~\"%2$s-.*\"}" + + " + on(pod) hadoop_llapdaemon_executornumexecutorsconfigured{namespace=\"%1$s\",pod=~\"%2$s-.*\"}" + + " - on(pod) hadoop_llapdaemon_executornumexecutorsavailable{namespace=\"%1$s\",pod=~\"%2$s-.*\"}" + + ") or vector(0)", + namespace, targetName), + "threshold", String.valueOf(autoscaling.scaleUpThreshold()), + "activationThreshold", "0" + ) + ), + buildHs2ActivationTrigger(namespace, hs2TargetName, maxReplicas) + ); + } + } + + /** + * TezAM ScaledObject: scales on CPU (or pending tasks) + HS2 activation trigger. + * Tez AMs run in a warm pool; claimed AMs consume CPU, idle ones do not. + */ + public static class TezAm extends HiveScaledObjectDependent { + public TezAm() { super("tezam", "StatefulSet"); } + + @Override + protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { + return hiveCluster.getSpec().tezAm().autoscaling(); + } + + @Override + protected int getMaxReplicas(HiveCluster hiveCluster) { + return hiveCluster.getSpec().tezAm().replicas(); + } + + @Override + protected int getPollingInterval() { return 5; } + + @Override + protected Map getAdvanced( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, int maxReplicas) { + return buildHpaBehavior( + autoscaling.cooldownSeconds(), "Pods", 1, 60, + 60, "Pods", 2, 30); + } + + @Override + protected List> getTriggers( + HiveCluster hiveCluster, AutoscalingSpec autoscaling, + int maxReplicas, String targetName) { + String hs2TargetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; + String namespace = hiveCluster.getMetadata().getNamespace(); + List> triggers = new ArrayList<>(); + if (hiveCluster.getSpec().tezAm().resources() != null) { + triggers.add(Map.of( + "type", "cpu", + "metricType", "Utilization", + "metadata", Map.of( + "value", String.valueOf(autoscaling.scaleUpThreshold()), + "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) + ) + )); + triggers.add(buildHs2ActivationTrigger(namespace, hs2TargetName, maxReplicas)); + } else { + triggers.add(Map.of( + "type", "prometheus", + "metadata", Map.of( + "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", + "metricName", "tez_session_pending_tasks", + "query", String.format( + "sum(tez_session_pending_tasks{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", + namespace, hs2TargetName), + "threshold", String.valueOf(autoscaling.scaleUpThreshold()), + "activationThreshold", "0" + ) + )); + triggers.add(buildHs2ActivationTrigger(namespace, hs2TargetName, maxReplicas)); + } + return triggers; + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java deleted file mode 100644 index 9bb0597cc960..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the hive-site.xml ConfigMap for HiveServer2. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class HiveServer2ConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "hiveserver2"; - - public HiveServer2ConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - HiveClusterSpec spec = hiveCluster.getSpec(); - - Map props = - HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec); - Map tezProps = HiveConfigBuilder.getTezSite(spec); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("hive-site.xml", HadoopXmlBuilder.buildXml(props)) - .addToData("tez-site.xml", HadoopXmlBuilder.buildXml(tezProps)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-hiveserver2-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java index c61383ac0f5a..c08796c218a7 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java @@ -26,8 +26,6 @@ import io.fabric8.kubernetes.api.model.ContainerPort; import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; -import io.fabric8.kubernetes.api.model.Lifecycle; -import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.Probe; import io.fabric8.kubernetes.api.model.apps.Deployment; import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; @@ -58,6 +56,12 @@ public HiveServer2DeploymentDependent() { super(Deployment.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected Deployment desired(HiveCluster hiveCluster, Context context) { @@ -139,13 +143,13 @@ protected Deployment desired(HiveCluster hiveCluster, List ports = new ArrayList<>(); ports.add(new ContainerPortBuilder() .withName("thrift") - .withContainerPort(hs2ThriftPort).build()); + .withContainerPort(hs2ThriftPort).withProtocol("TCP").build()); ports.add(new ContainerPortBuilder() .withName("http") - .withContainerPort(hs2HttpPort).build()); + .withContainerPort(hs2HttpPort).withProtocol("TCP").build()); ports.add(new ContainerPortBuilder() .withName("webui") - .withContainerPort(hs2WebUiPort).build()); + .withContainerPort(hs2WebUiPort).withProtocol("TCP").build()); // Probes target the HTTP transport port (default mode) Probe readinessProbe = buildTcpProbe(hs2HttpPort, hs2.readinessProbe(), 15, 10, 3); @@ -165,8 +169,8 @@ protected Deployment desired(HiveCluster hiveCluster, List volumes = new ArrayList<>(); volumes.add(buildProjectedConfigVolume("hive-config", - HiveServer2ConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); + HiveConfigMapDependent.HiveServer2.resourceName(hiveCluster), + HiveConfigMapDependent.Hadoop.resourceName(hiveCluster))); if (tezAmEnabled) { volumeMounts.add( @@ -211,18 +215,12 @@ protected Deployment desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); - // When autoscaling is enabled and the Deployment already exists, preserve the current - // replica count (managed by KEDA/HPA). On initial creation: - // - minReplicas == 0 (scale-to-zero): start at 0, KEDA HTTPScaledObject handles wake-up - // - minReplicas > 0: start at configured replicas - boolean autoscalingEnabled = hs2.autoscaling() != null && hs2.autoscaling().isEnabled(); - Integer replicas = hs2.replicas(); - if (autoscalingEnabled) { - int initialReplicas = hs2.autoscaling().minReplicas() == 0 ? 0 : hs2.replicas(); - replicas = getSecondaryResource(hiveCluster, context) - .map(d -> d.getSpec().getReplicas()) - .orElse(initialReplicas); - } + // When autoscaling is enabled, preserve current replica count (KEDA/HPA manages it). + AutoscalingSpec hs2Autoscaling = hs2.autoscaling(); + int initialReplicas = hs2Autoscaling != null && hs2Autoscaling.minReplicas() == 0 + ? 0 : hs2.replicas(); + Integer replicas = resolveReplicaCount( + hiveCluster, context, hs2Autoscaling, hs2.replicas(), initialReplicas); Deployment deployment = new DeploymentBuilder() .withNewMetadata() @@ -264,71 +262,24 @@ protected Deployment desired(HiveCluster hiveCluster, deployment.getSpec().getTemplate().getSpec(), selectorLabels); // Graceful scale-down: deregister from ZK, then poll JMX Exporter (port 9404) for sessions. - // Uses flat Prometheus text format — same metric KEDA reads — not brittle JSON parsing. if (autoscaling.isEnabled()) { - String preStopScript = String.join("\n", - "#!/bin/bash", + List zkDeregister = List.of( "echo '[preStop] Deregistering HiveServer2 from ZooKeeper...'", - "hive --service hiveserver2 --deregister || echo '[preStop] WARNING: ZK deregister failed'", - "echo '[preStop] Waiting for open sessions to drain (polling localhost:9404/metrics)...'", - "RETRIES=0", - "while true; do", - " RESPONSE=$(curl -sf http://localhost:9404/metrics)", - " if [ $? -ne 0 ]; then", - " RETRIES=$((RETRIES+1))", - " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", - " if [ $RETRIES -ge 6 ]; then", - " echo '[preStop] JMX Exporter not responding after 30s. Proceeding with shutdown.'", - " break", - " fi", - " sleep 5; continue", - " fi", - " SESSIONS=$(echo \"$RESPONSE\" | grep '^hs2_open_sessions ' | awk '{print $2}')", - " if [ -z \"$SESSIONS\" ]; then", - " echo '[preStop] WARNING: hs2_open_sessions metric not found. JMX Exporter may not be configured.'", - " break", - " fi", - " if [ \"${SESSIONS%.*}\" -le 0 ] 2>/dev/null; then", - " echo '[preStop] All sessions drained. Shutting down.'", - " break", - " fi", - " echo \"[preStop] hs2_open_sessions=$SESSIONS — waiting...\"", - " RETRIES=0", - " sleep 5", - "done"); - Lifecycle lifecycle = new LifecycleBuilder() - .withNewPreStop() - .withNewExec() - .withCommand("/bin/bash", "-c", preStopScript) - .endExec() - .endPreStop() - .build(); - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); - deployment.getSpec().getTemplate().getSpec() - .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); - // Prometheus scrape annotations for JMX Exporter metrics endpoint - deployment.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/scrape", "true"); - deployment.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/port", String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); - deployment.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/path", "/metrics"); + "hive --service hiveserver2 --deregister || echo '[preStop] WARNING: ZK deregister failed'"); + String preStopScript = buildDrainScript( + "Waiting for open sessions to drain", + "hs2_open_sessions", "SESSIONS", + "All sessions drained. Shutting down.", + 5, 6, zkDeregister); + applyAutoscalingLifecycle( + deployment.getSpec().getTemplate().getSpec(), + deployment.getSpec().getTemplate().getMetadata(), + preStopScript, autoscaling.gracePeriodSeconds()); } - if (spec.volumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (hs2.extraVolumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(hs2.extraVolumes()); - } - if (hs2.extraVolumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(hs2.extraVolumeMounts()); - } + appendUserVolumes(deployment.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + hs2.extraVolumes(), hs2.extraVolumeMounts()); return deployment; } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java index 74794ad8e16a..055bd878d2f3 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2HttpScaledObjectDependent.java @@ -70,11 +70,12 @@ protected GenericKubernetesResource desired(HiveCluster hiveCluster, Map spec = new HashMap<>(); // Hosts the interceptor matches for routing. - // Uses internal service DNS names (Ingress rewrites Host header to match these) - // plus localhost for kubectl port-forward scenarios. + // Includes: internal service FQDN, short name, interceptor proxy name + // (for in-cluster kubectl exec), and localhost (for port-forward). spec.put("hosts", List.of( serviceName + "." + namespace + ".svc.cluster.local", serviceName, + "keda-add-ons-http-interceptor-proxy.keda.svc", "localhost" )); spec.put("pathPrefixes", List.of("/")); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2InterceptorRouteDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2InterceptorRouteDependent.java new file mode 100644 index 000000000000..de6e3bb71d5c --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2InterceptorRouteDependent.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.GenericKubernetesResource; +import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.GroupVersionKind; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Manages a KEDA InterceptorRoute for HiveServer2 scale-to-zero routing. + *

+ * Unlike HTTPScaledObject, InterceptorRoute only configures interceptor + * routing without auto-creating a ScaledObject. This allows us to manage + * scaling entirely through a single Prometheus-based ScaledObject that + * combines session/CPU awareness with the HTTP interceptor wake-from-zero + * trigger. + *

+ * Requires the KEDA HTTP Add-on to be installed in the cluster. + */ +public class HiveServer2InterceptorRouteDependent extends HiveGenericDependentResource { + + public HiveServer2InterceptorRouteDependent() { + super(new GroupVersionKind("http.keda.sh", "v1beta1", "InterceptorRoute")); + } + + @Override + protected GenericKubernetesResource desired(HiveCluster hiveCluster, + Context context) { + AutoscalingSpec autoscaling = hiveCluster.getSpec().hiveServer2().autoscaling(); + String clusterName = hiveCluster.getMetadata().getName(); + String namespace = hiveCluster.getMetadata().getNamespace(); + String serviceName = clusterName + "-hiveserver2"; + + int httpPort = ConfigUtils.getInt( + hiveCluster.getSpec().hiveServer2().configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); + + // Hosts the interceptor matches for routing + List hosts = new ArrayList<>(List.of( + serviceName + "." + namespace + ".svc.cluster.local", + serviceName, + "keda-add-ons-http-interceptor-proxy.keda.svc", + "localhost" + )); + + Map spec = new HashMap<>(); + + // Target backend service + spec.put("target", Map.of( + "service", serviceName, + "port", httpPort + )); + + // Routing rules + spec.put("rules", List.of( + Map.of( + "hosts", hosts, + "paths", List.of(Map.of("value", "/")) + ) + )); + + // Scaling metric (required field, used by interceptor for queue management) + spec.put("scalingMetric", Map.of( + "concurrency", Map.of( + "targetValue", autoscaling.scaleUpThreshold() + ) + )); + + return new GenericKubernetesResourceBuilder() + .withApiVersion("http.keda.sh/v1beta1") + .withKind("InterceptorRoute") + .withNewMetadata() + .withName(resourceName(hiveCluster)) + .withNamespace(namespace) + .withLabels(Labels.forComponent(hiveCluster, "hiveserver2")) + .endMetadata() + .withAdditionalProperties(Map.of("spec", spec)) + .build(); + } + + @Override + protected String getResourceName(HiveCluster hiveCluster) { + return resourceName(hiveCluster); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-route"; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java deleted file mode 100644 index 054881f9644d..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2PdbDependent.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * PodDisruptionBudget for HiveServer2. - * Ensures at least one HS2 pod remains available during voluntary disruptions - * (scale-down, node drain, rolling updates) to prevent query failures. - */ -public class HiveServer2PdbDependent - extends CRUDKubernetesDependentResource { - - public HiveServer2PdbDependent() { - super(PodDisruptionBudget.class); - } - - @Override - protected PodDisruptionBudget desired(HiveCluster hiveCluster, - Context context) { - return new PodDisruptionBudgetBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "hiveserver2")) - .endMetadata() - .withNewSpec() - .withMinAvailable(new IntOrString(1)) - .withNewSelector() - .withMatchLabels(Labels.selectorForComponent(hiveCluster, "hiveserver2")) - .endSelector() - .endSpec() - .build(); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-hiveserver2-pdb"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java deleted file mode 100644 index 196d97ce8b8f..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ScaledObjectDependent.java +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import io.fabric8.kubernetes.api.model.GenericKubernetesResource; -import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.GroupVersionKind; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages a KEDA ScaledObject for HiveServer2 autoscaling. - *

- * Scale-up triggers (OR): - * - hs2_active_sessions > scaleUpThreshold% of hive.server2.session.max (1 min) - * - Pod CPU > 75% - *

- * Scale-down triggers (AND): - * - hs2_open_sessions < scaleDownThreshold% of max - * - CPU < 30% - *

- * Cooldown: configurable (default 600s / 10 minutes) - */ -public class HiveServer2ScaledObjectDependent extends HiveGenericDependentResource { - - public HiveServer2ScaledObjectDependent() { - super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); - } - - @Override - protected GenericKubernetesResource desired(HiveCluster hiveCluster, - Context context) { - AutoscalingSpec autoscaling = hiveCluster.getSpec().hiveServer2().autoscaling(); - int maxReplicas = hiveCluster.getSpec().hiveServer2().replicas(); - String targetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; - - Map spec = new HashMap<>(); - spec.put("scaleTargetRef", Map.of( - "apiVersion", "apps/v1", - "kind", "Deployment", - "name", targetName - )); - // KEDA requires idleReplicaCount < minReplicaCount. - // For scale-to-zero: min=1 (minimum when active), idle=0 (scale to zero when idle). - int minReplicaCount = Math.max(1, autoscaling.minReplicas()); - spec.put("minReplicaCount", minReplicaCount); - spec.put("maxReplicaCount", maxReplicas); - if (autoscaling.minReplicas() == 0) { - spec.put("idleReplicaCount", 0); - } - spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); - spec.put("pollingInterval", 30); - - // Advanced scaling policy: scale down one pod at a time for graceful drain - spec.put("advanced", Map.of( - "horizontalPodAutoscalerConfig", Map.of( - "behavior", Map.of( - "scaleDown", Map.of( - "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), - "policies", List.of(Map.of( - "type", "Pods", - "value", 1, - "periodSeconds", 60 - )) - ), - "scaleUp", Map.of( - "stabilizationWindowSeconds", 60, - "policies", List.of(Map.of( - "type", "Percent", - "value", 100, - "periodSeconds", 60 - )) - ) - ) - ) - )); - - // Triggers: Prometheus for hs2_active_sessions + CPU fallback (only when CPU requests defined) - // "or vector(0)" ensures the query returns 0 (not empty) when HS2 has no pods. - List> triggers = new ArrayList<>(); - triggers.add(Map.of( - "type", "prometheus", - "metadata", Map.of( - "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", - "metricName", "hs2_active_sessions", - "query", String.format( - "avg(hs2_active_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", - hiveCluster.getMetadata().getNamespace(), targetName), - "threshold", String.valueOf(autoscaling.scaleUpThreshold()), - "activationThreshold", "0" - ) - )); - if (hiveCluster.getSpec().hiveServer2().resources() != null) { - // activationValue prevents idle JVM CPU from keeping the ScaledObject active. - triggers.add(Map.of( - "type", "cpu", - "metricType", "Utilization", - "metadata", Map.of( - "value", "75", - "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) - ) - )); - } - spec.put("triggers", triggers); - - return new GenericKubernetesResourceBuilder() - .withApiVersion("keda.sh/v1alpha1") - .withKind("ScaledObject") - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "hiveserver2")) - .endMetadata() - .withAdditionalProperties(Map.of("spec", spec)) - .build(); - } - - @Override - protected String getResourceName(HiveCluster hiveCluster) { - return resourceName(hiveCluster); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-hiveserver2-scaledobject"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java deleted file mode 100644 index 13b218986e67..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; -import org.apache.hive.kubernetes.operator.util.ConfigUtils; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the Kubernetes Service for HiveServer2 (Thrift and WebUI ports). */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class HiveServer2ServiceDependent - extends HiveDependentResource { - - public HiveServer2ServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - HiveServer2Spec hs2 = hiveCluster.getSpec().hiveServer2(); - int thriftPort = ConfigUtils.getInt(hs2.configOverrides(), - ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, - null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); - int httpPort = ConfigUtils.getInt(hs2.configOverrides(), - ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, - null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); - int webUiPort = ConfigUtils.getInt(hs2.configOverrides(), - ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, - null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); - - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-hiveserver2") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - HiveServer2DeploymentDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withType(hs2.serviceType()) - .withSelector(Labels.selectorForComponent(hiveCluster, - HiveServer2DeploymentDependent.COMPONENT)) - .addNewPort() - .withName("thrift") - .withPort(thriftPort) - .withTargetPort(new IntOrString(thriftPort)) - .endPort() - .addNewPort() - .withName("http") - .withPort(httpPort) - .withTargetPort(new IntOrString(httpPort)) - .endPort() - .addNewPort() - .withName("webui") - .withPort(webUiPort) - .withTargetPort(new IntOrString(webUiPort)) - .endPort() - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java new file mode 100644 index 000000000000..f3a064f48b95 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Unified Kubernetes Service dependent for all Hive components. + * Subclassed per component to define component-specific service type and ports. + */ +public abstract class HiveServiceDependent + extends HiveDependentResource { + + private final String component; + + protected HiveServiceDependent(String component) { + super(Service.class); + this.component = component; + } + + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return primary.getMetadata().getName() + "-" + component; + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + ServiceBuilder builder = new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-" + component) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, component)) + .endMetadata() + .withNewSpec() + .withSelector(Labels.selectorForComponent(hiveCluster, component)) + .endSpec(); + customizeSpec(builder, hiveCluster); + return builder.build(); + } + + /** Subclasses override to set service type and add ports. */ + protected abstract void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster); + + /** HiveServer2 Service: configurable type, thrift + http + webui ports. */ + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class HiveServer2 extends HiveServiceDependent { + public HiveServer2() { super("hiveserver2"); } + + @Override + protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + var hs2 = hiveCluster.getSpec().hiveServer2(); + int thriftPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); + int httpPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); + int webUiPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); + builder.editSpec() + .withType(hs2.serviceType()) + .addNewPort().withName("thrift").withProtocol("TCP") + .withPort(thriftPort).withTargetPort(new IntOrString(thriftPort)).endPort() + .addNewPort().withName("http").withProtocol("TCP") + .withPort(httpPort).withTargetPort(new IntOrString(httpPort)).endPort() + .addNewPort().withName("webui").withProtocol("TCP") + .withPort(webUiPort).withTargetPort(new IntOrString(webUiPort)).endPort() + .endSpec(); + } + } + + /** Metastore Service: ClusterIP, thrift + rest ports. */ + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class Metastore extends HiveServiceDependent { + public Metastore() { super("metastore"); } + + @Override + protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + int thriftPort = ConfigUtils.getInt( + hiveCluster.getSpec().metastore().configOverrides(), + ConfigUtils.METASTORE_THRIFT_PORT_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); + builder.editSpec() + .withType("ClusterIP") + .addNewPort().withName("thrift").withProtocol("TCP") + .withPort(thriftPort).withTargetPort(new IntOrString(thriftPort)).endPort() + .addNewPort().withName("rest").withProtocol("TCP") + .withPort(9001).withTargetPort(new IntOrString(9001)).endPort() + .endSpec(); + } + } + + /** LLAP headless Service: required by StatefulSet for stable DNS. */ + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class Llap extends HiveServiceDependent { + public Llap() { super("llap"); } + + @Override + protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + builder.editSpec() + .withClusterIP("None") + .addNewPort().withName("management").withProtocol("TCP") + .withPort(15004).withTargetPort(new IntOrString(15004)).endPort() + .addNewPort().withName("shuffle").withProtocol("TCP") + .withPort(15551).withTargetPort(new IntOrString(15551)).endPort() + .addNewPort().withName("web").withProtocol("TCP") + .withPort(15002).withTargetPort(new IntOrString(15002)).endPort() + .endSpec(); + } + } + + /** TezAM headless Service: required by StatefulSet for stable DNS. */ + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class TezAm extends HiveServiceDependent { + public TezAm() { super("tezam"); } + + @Override + protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + builder.editSpec() + .withClusterIP("None") + .endSpec(); + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java deleted file mode 100644 index 2ad6955dadb8..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the llap-daemon-site.xml ConfigMap for LLAP daemons. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class LlapConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "llap"; - - public LlapConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - Map props = - HiveConfigBuilder.getLlapDaemonSite(hiveCluster.getSpec()); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("llap-daemon-site.xml", - HadoopXmlBuilder.buildXml(props)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-llap-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java deleted file mode 100644 index 1f077751aa61..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapPdbDependent.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * PodDisruptionBudget for LLAP daemons. - * Ensures at least one LLAP daemon remains available during voluntary disruptions - * to prevent query failures and cache loss. - */ -public class LlapPdbDependent - extends CRUDKubernetesDependentResource { - - public LlapPdbDependent() { - super(PodDisruptionBudget.class); - } - - @Override - protected PodDisruptionBudget desired(HiveCluster hiveCluster, - Context context) { - return new PodDisruptionBudgetBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "llap")) - .endMetadata() - .withNewSpec() - .withMinAvailable(new IntOrString(1)) - .withNewSelector() - .withMatchLabels(Labels.selectorForComponent(hiveCluster, "llap")) - .endSelector() - .endSpec() - .build(); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-llap-pdb"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java deleted file mode 100644 index 7f6886a594df..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapScaledObjectDependent.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import io.fabric8.kubernetes.api.model.GenericKubernetesResource; -import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.GroupVersionKind; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages a KEDA ScaledObject for LLAP daemon autoscaling. - *

- * Scale-up trigger: - * - NumQueuedRequests > 0 for 1 minute (queue non-empty means all executors are busy) - *

- * Scale-down trigger: - * - NumExecutorsAvailable == NumExecutors (daemon completely idle) - *

- * Cooldown: configurable (default 900s / 15 minutes — scaling down destroys in-memory cache) - */ -public class LlapScaledObjectDependent extends HiveGenericDependentResource { - - public LlapScaledObjectDependent() { - super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); - } - - @Override - protected GenericKubernetesResource desired(HiveCluster hiveCluster, - Context context) { - AutoscalingSpec autoscaling = hiveCluster.getSpec().llap().autoscaling(); - int maxReplicas = hiveCluster.getSpec().llap().replicas(); - String targetName = hiveCluster.getMetadata().getName() + "-llap"; - - Map spec = new HashMap<>(); - spec.put("scaleTargetRef", Map.of( - "apiVersion", "apps/v1", - "kind", "StatefulSet", - "name", targetName - )); - // KEDA requires idleReplicaCount < minReplicaCount. - // For scale-to-zero: min=1 (minimum when active), idle=0 (scale to zero when idle). - int minReplicaCount = Math.max(1, autoscaling.minReplicas()); - spec.put("minReplicaCount", minReplicaCount); - spec.put("maxReplicaCount", maxReplicas); - if (autoscaling.minReplicas() == 0) { - spec.put("idleReplicaCount", 0); - } - spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); - spec.put("pollingInterval", 5); - - // LLAP scale-up is aggressive: when queries need daemons, scale immediately to max. - // Scale down is slow (1 pod per cooldown) to preserve in-memory cache. - spec.put("advanced", Map.of( - "horizontalPodAutoscalerConfig", Map.of( - "behavior", Map.of( - "scaleDown", Map.of( - "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), - "policies", List.of(Map.of( - "type", "Pods", - "value", 1, - "periodSeconds", autoscaling.cooldownSeconds() - )) - ), - "scaleUp", Map.of( - "stabilizationWindowSeconds", 0, - "policies", List.of(Map.of( - "type", "Pods", - "value", maxReplicas, - "periodSeconds", 15 - )) - ) - ) - ) - )); - - // Triggers: - // 1. Prometheus for NumQueuedRequests — drives proportional scaling. - // More queued requests = more LLAP daemons needed. Scales up to max. - // 2. HS2 open sessions — activation only (wake from 0→1). - // Threshold set to maxReplicas so desired = 1/max ≈ 1 (never drives above min). - // activationThreshold=0 ensures any session activates the ScaledObject. - // - // Scale-down: HPA policy removes 1 pod per cooldown period (preserves cache). - // Idle (all sessions closed + no queued requests): after cooldownPeriod → 0. - // "or vector(0)" ensures queries return 0 (not empty) when pods don't exist. - String hs2TargetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; - String namespace = hiveCluster.getMetadata().getNamespace(); - spec.put("triggers", List.of( - Map.of( - "type", "prometheus", - "metadata", Map.of( - "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", - "metricName", "llap_num_queued_requests", - "query", String.format( - "avg(hadoop_llapdaemon_executornumqueuedrequests{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", - namespace, targetName), - "threshold", String.valueOf(autoscaling.scaleUpThreshold()), - "activationThreshold", "0" - ) - ), - Map.of( - "type", "prometheus", - "metadata", Map.of( - "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", - "metricName", "hs2_open_sessions_activation", - "query", String.format( - "(max(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) > bool 0) or vector(0)", - namespace, hs2TargetName), - "threshold", String.valueOf(maxReplicas), - "activationThreshold", "0" - ) - ) - )); - - return new GenericKubernetesResourceBuilder() - .withApiVersion("keda.sh/v1alpha1") - .withKind("ScaledObject") - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "llap")) - .endMetadata() - .withAdditionalProperties(Map.of("spec", spec)) - .build(); - } - - @Override - protected String getResourceName(HiveCluster hiveCluster) { - return resourceName(hiveCluster); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-llap-scaledobject"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java deleted file mode 100644 index 108f29347a97..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages the headless Kubernetes Service for LLAP daemons. - * Required by the StatefulSet for stable DNS entries and ZooKeeper registration. - */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class LlapServiceDependent - extends HiveDependentResource { - - public LlapServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-llap") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - LlapStatefulSetDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withClusterIP("None") - .withSelector(Labels.selectorForComponent(hiveCluster, - LlapStatefulSetDependent.COMPONENT)) - .addNewPort() - .withName("management") - .withPort(15004) - .withTargetPort(new IntOrString(15004)) - .endPort() - .addNewPort() - .withName("shuffle") - .withPort(15551) - .withTargetPort(new IntOrString(15551)) - .endPort() - .addNewPort() - .withName("web") - .withPort(15002) - .withTargetPort(new IntOrString(15002)) - .endPort() - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java index 35fcbb8ac171..7bd1473afefc 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java @@ -26,8 +26,6 @@ import io.fabric8.kubernetes.api.model.ContainerPort; import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; -import io.fabric8.kubernetes.api.model.Lifecycle; -import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.Probe; import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; @@ -38,7 +36,6 @@ import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; -import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; import org.apache.hive.kubernetes.operator.util.Labels; @@ -60,6 +57,12 @@ public LlapStatefulSetDependent() { super(StatefulSet.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected StatefulSet desired(HiveCluster hiveCluster, Context context) { @@ -87,13 +90,13 @@ protected StatefulSet desired(HiveCluster hiveCluster, List ports = new ArrayList<>(); ports.add(new ContainerPortBuilder() - .withName("management").withContainerPort(15004).build()); + .withName("management").withContainerPort(15004).withProtocol("TCP").build()); ports.add(new ContainerPortBuilder() - .withName("shuffle").withContainerPort(15551).build()); + .withName("shuffle").withContainerPort(15551).withProtocol("TCP").build()); ports.add(new ContainerPortBuilder() - .withName("web").withContainerPort(15002).build()); + .withName("web").withContainerPort(15002).withProtocol("TCP").build()); ports.add(new ContainerPortBuilder() - .withName("output").withContainerPort(15003).build()); + .withName("output").withContainerPort(15003).withProtocol("TCP").build()); Probe readinessProbe = buildTcpProbe(15004, llap.readinessProbe(), 15, 10, 3); @@ -109,8 +112,8 @@ protected StatefulSet desired(HiveCluster hiveCluster, List volumes = new ArrayList<>(); volumes.add(buildProjectedConfigVolume("llap-config", - LlapConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); + HiveConfigMapDependent.Llap.resourceName(hiveCluster), + HiveConfigMapDependent.Hadoop.resourceName(hiveCluster))); List initContainers = new ArrayList<>(); addExternalJars(spec.image(), spec.externalJars(), @@ -130,18 +133,12 @@ protected StatefulSet desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getLlapDaemonSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); - // When autoscaling is enabled and the StatefulSet already exists, preserve the current - // replica count (managed by KEDA/HPA). On initial creation: - // - minReplicas == 0: start at 0, KEDA scales up when hs2_active_sessions > 0 - // - minReplicas > 0: start at configured replicas - boolean autoscalingEnabled = llap.autoscaling() != null && llap.autoscaling().isEnabled(); - Integer replicas = llap.replicas(); - if (autoscalingEnabled) { - int initialReplicas = llap.autoscaling().minReplicas() == 0 ? 0 : llap.replicas(); - replicas = getSecondaryResource(hiveCluster, context) - .map(s -> s.getSpec().getReplicas()) - .orElse(initialReplicas); - } + // When autoscaling is enabled, preserve current replica count (KEDA/HPA manages it). + AutoscalingSpec llapAutoscaling = llap.autoscaling(); + int initialReplicas = llapAutoscaling != null && llapAutoscaling.minReplicas() == 0 + ? 0 : llap.replicas(); + Integer replicas = resolveReplicaCount( + hiveCluster, context, llapAutoscaling, llap.replicas(), initialReplicas); StatefulSet statefulSet = new StatefulSetBuilder() .withNewMetadata() @@ -183,70 +180,24 @@ protected StatefulSet desired(HiveCluster hiveCluster, statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); // Graceful scale-down: poll JMX Exporter (port 9404) until all executors idle. - // Uses flat Prometheus text format — same metrics KEDA reads — not brittle JSON parsing. if (autoscaling.isEnabled()) { - String preStopScript = String.join("\n", - "#!/bin/bash", - "echo '[preStop] Waiting for LLAP executors to become idle (polling localhost:9404/metrics)...'", - "RETRIES=0", - "while true; do", - " RESPONSE=$(curl -sf http://localhost:9404/metrics)", - " if [ $? -ne 0 ]; then", - " RETRIES=$((RETRIES+1))", - " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", - " if [ $RETRIES -ge 6 ]; then", - " echo '[preStop] JMX Exporter not responding after 60s. Proceeding with shutdown.'", - " break", - " fi", - " sleep 10; continue", - " fi", - " AVAILABLE=$(echo \"$RESPONSE\" | grep '^hadoop_llapdaemon_executornumexecutorsavailable{' | awk '{print $2}')", - " TOTAL=$(echo \"$RESPONSE\" | grep '^hadoop_llapdaemon_executornumexecutors{' | awk '{print $2}')", - " if [ -z \"$AVAILABLE\" ] || [ -z \"$TOTAL\" ]; then", - " echo '[preStop] WARNING: LLAP executor metrics not found. JMX Exporter may not be configured.'", - " break", - " fi", - " if [ \"${AVAILABLE%.*}\" -ge \"${TOTAL%.*}\" ] 2>/dev/null; then", - " echo '[preStop] All executors idle. Shutting down.'", - " break", - " fi", - " echo \"[preStop] Executors available=$AVAILABLE / total=$TOTAL — waiting...\"", - " RETRIES=0", - " sleep 10", - "done"); - Lifecycle lifecycle = new LifecycleBuilder() - .withNewPreStop() - .withNewExec() - .withCommand("/bin/bash", "-c", preStopScript) - .endExec() - .endPreStop() - .build(); - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); - statefulSet.getSpec().getTemplate().getSpec() - .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); - // Prometheus scrape annotations for JMX Exporter metrics endpoint - statefulSet.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/scrape", "true"); - statefulSet.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/port", String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); - statefulSet.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/path", "/metrics"); + String preStopScript = buildDualMetricDrainScript( + "Waiting for LLAP executors to become idle", + "hadoop_llapdaemon_executornumexecutorsavailable{", "AVAILABLE", + "hadoop_llapdaemon_executornumexecutors{", "TOTAL", + "LLAP executor metrics not found. JMX Exporter may not be configured.", + "All executors idle. Shutting down.", + "Executors available=$AVAILABLE / total=$TOTAL \u2014 waiting...", + 10, 6); + applyAutoscalingLifecycle( + statefulSet.getSpec().getTemplate().getSpec(), + statefulSet.getSpec().getTemplate().getMetadata(), + preStopScript, autoscaling.gracePeriodSeconds()); } - if (spec.volumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (llap.extraVolumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(llap.extraVolumes()); - } - if (llap.extraVolumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(llap.extraVolumeMounts()); - } + appendUserVolumes(statefulSet.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + llap.extraVolumes(), llap.extraVolumeMounts()); return statefulSet; } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java deleted file mode 100644 index b429335f76e0..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the metastore-site.xml ConfigMap for the Hive Metastore. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class MetastoreConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "metastore"; - - public MetastoreConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - Map props = - HiveConfigBuilder.getMetastoreSite(hiveCluster.getSpec()); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("metastore-site.xml", HadoopXmlBuilder.buildXml(props)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-metastore-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java index e1f88caacb63..ce16cc17eeaf 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java @@ -26,8 +26,6 @@ import io.fabric8.kubernetes.api.model.ContainerPort; import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; -import io.fabric8.kubernetes.api.model.Lifecycle; -import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.Probe; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; @@ -59,6 +57,12 @@ public MetastoreDeploymentDependent() { super(Deployment.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected Deployment desired(HiveCluster hiveCluster, Context context) { @@ -82,9 +86,9 @@ protected Deployment desired(HiveCluster hiveCluster, ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); List ports = new ArrayList<>(); ports.add(new ContainerPortBuilder() - .withName("thrift").withContainerPort(thriftPort).build()); + .withName("thrift").withContainerPort(thriftPort).withProtocol("TCP").build()); ports.add(new ContainerPortBuilder() - .withName("rest").withContainerPort(9001).build()); + .withName("rest").withContainerPort(9001).withProtocol("TCP").build()); Probe readinessProbe = buildTcpProbe(thriftPort, spec.metastore().readinessProbe(), 15, 10, 3); Probe livenessProbe = buildTcpProbe(thriftPort, spec.metastore().livenessProbe(), 60, 30, 5); @@ -124,18 +128,12 @@ protected Deployment desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getMetastoreSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); - // When autoscaling is enabled and the Deployment already exists, preserve the current - // replica count (managed by KEDA/HPA). On initial creation, start at minReplicas - // and let KEDA scale up based on load. - boolean autoscalingEnabled = spec.metastore().autoscaling() != null - && spec.metastore().autoscaling().isEnabled(); - Integer replicas = spec.metastore().replicas(); - if (autoscalingEnabled) { - int initialReplicas = Math.max(1, spec.metastore().autoscaling().minReplicas()); - replicas = getSecondaryResource(hiveCluster, context) - .map(d -> d.getSpec().getReplicas()) - .orElse(initialReplicas); - } + // When autoscaling is enabled, preserve current replica count (KEDA/HPA manages it). + AutoscalingSpec msAutoscaling = spec.metastore().autoscaling(); + int initialReplicas = msAutoscaling != null + ? Math.max(1, msAutoscaling.minReplicas()) : spec.metastore().replicas(); + Integer replicas = resolveReplicaCount( + hiveCluster, context, msAutoscaling, spec.metastore().replicas(), initialReplicas); Deployment deployment = new DeploymentBuilder() .withNewMetadata() @@ -178,70 +176,21 @@ protected Deployment desired(HiveCluster hiveCluster, deployment.getSpec().getTemplate().getSpec(), selectorLabels); // Graceful scale-down: poll JMX Exporter (port 9404) for open_connections to drain. - // K8s removes the pod from Service Endpoints on termination, so no new requests arrive. - // Uses flat Prometheus text format — same metric KEDA reads — not brittle JSON parsing. if (autoscaling.isEnabled()) { - String preStopScript = String.join("\n", - "#!/bin/bash", - "echo '[preStop] Waiting for open connections to drain (polling localhost:9404/metrics)...'", - "RETRIES=0", - "while true; do", - " RESPONSE=$(curl -sf http://localhost:9404/metrics)", - " if [ $? -ne 0 ]; then", - " RETRIES=$((RETRIES+1))", - " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", - " if [ $RETRIES -ge 6 ]; then", - " echo '[preStop] JMX Exporter not responding after 30s. Proceeding with shutdown.'", - " break", - " fi", - " sleep 5; continue", - " fi", - " CONNS=$(echo \"$RESPONSE\" | grep '^hive_metastore_open_connections ' | awk '{print $2}')", - " if [ -z \"$CONNS\" ]; then", - " echo '[preStop] WARNING: hive_metastore_open_connections metric not found. JMX Exporter may not be configured.'", - " break", - " fi", - " if [ \"${CONNS%.*}\" -le 0 ] 2>/dev/null; then", - " echo '[preStop] All connections drained. Shutting down.'", - " break", - " fi", - " echo \"[preStop] hive_metastore_open_connections=$CONNS — waiting...\"", - " RETRIES=0", - " sleep 5", - "done"); - Lifecycle lifecycle = new LifecycleBuilder() - .withNewPreStop() - .withNewExec() - .withCommand("/bin/bash", "-c", preStopScript) - .endExec() - .endPreStop() - .build(); - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); - deployment.getSpec().getTemplate().getSpec() - .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); - // Prometheus scrape annotations for JMX Exporter metrics endpoint - deployment.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/scrape", "true"); - deployment.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/port", String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); - deployment.getSpec().getTemplate().getMetadata().getAnnotations() - .put("prometheus.io/path", "/metrics"); + String preStopScript = buildDrainScript( + "Waiting for open connections to drain", + "hive_metastore_open_connections", "CONNS", + "All connections drained. Shutting down.", + 5, 6, null); + applyAutoscalingLifecycle( + deployment.getSpec().getTemplate().getSpec(), + deployment.getSpec().getTemplate().getMetadata(), + preStopScript, autoscaling.gracePeriodSeconds()); } - if (spec.volumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (spec.metastore().extraVolumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.metastore().extraVolumes()); - } - if (spec.metastore().extraVolumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.metastore().extraVolumeMounts()); - } + appendUserVolumes(deployment.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + spec.metastore().extraVolumes(), spec.metastore().extraVolumeMounts()); return deployment; } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java deleted file mode 100644 index e177e1e60138..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastorePdbDependent.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * PodDisruptionBudget for Hive Metastore. - * Ensures at least one Metastore pod remains available during voluntary disruptions - * to prevent catalog access failures. - */ -public class MetastorePdbDependent - extends CRUDKubernetesDependentResource { - - public MetastorePdbDependent() { - super(PodDisruptionBudget.class); - } - - @Override - protected PodDisruptionBudget desired(HiveCluster hiveCluster, - Context context) { - return new PodDisruptionBudgetBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "metastore")) - .endMetadata() - .withNewSpec() - .withMinAvailable(new IntOrString(1)) - .withNewSelector() - .withMatchLabels(Labels.selectorForComponent(hiveCluster, "metastore")) - .endSelector() - .endSpec() - .build(); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-metastore-pdb"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java deleted file mode 100644 index 58263318468f..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreScaledObjectDependent.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import io.fabric8.kubernetes.api.model.GenericKubernetesResource; -import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.GroupVersionKind; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; -import org.apache.hive.kubernetes.operator.util.ConfigUtils; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages a KEDA ScaledObject for Hive Metastore autoscaling. - *

- * Scale-up triggers (OR): - * - Open connections exceed threshold (Prometheus) - * - Pod CPU > 75% - *

- * Scale-down triggers (AND): - * - CPU < activationValue - * - Open connections at 0 - *

- * Cooldown: configurable (default 300s / 5 minutes) - * Guardrail: replicas should be set based on backend DB max_connections. - */ -public class MetastoreScaledObjectDependent extends HiveGenericDependentResource { - - public MetastoreScaledObjectDependent() { - super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); - } - - @Override - protected GenericKubernetesResource desired(HiveCluster hiveCluster, - Context context) { - AutoscalingSpec autoscaling = hiveCluster.getSpec().metastore().autoscaling(); - int maxReplicas = hiveCluster.getSpec().metastore().replicas(); - String targetName = hiveCluster.getMetadata().getName() + "-metastore"; - - // Threshold = max threads per pod (from metastore-site config or default 1000). - // KEDA divides total open_connections by threshold to determine desired replicas. - int maxThreads = ConfigUtils.getInt( - hiveCluster.getSpec().metastore().configOverrides(), - ConfigUtils.METASTORE_SERVER_MAX_THREADS_KEY, - ConfigUtils.METASTORE_SERVER_MAX_THREADS_HIVE_KEY, - ConfigUtils.METASTORE_SERVER_MAX_THREADS_DEFAULT); - - Map spec = new HashMap<>(); - spec.put("scaleTargetRef", Map.of( - "apiVersion", "apps/v1", - "kind", "Deployment", - "name", targetName - )); - spec.put("minReplicaCount", autoscaling.minReplicas()); - spec.put("maxReplicaCount", maxReplicas); - spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); - spec.put("pollingInterval", 30); - - spec.put("advanced", Map.of( - "horizontalPodAutoscalerConfig", Map.of( - "behavior", Map.of( - "scaleDown", Map.of( - "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), - "policies", List.of(Map.of( - "type", "Pods", - "value", 1, - "periodSeconds", 60 - )) - ), - "scaleUp", Map.of( - "stabilizationWindowSeconds", 120, - "policies", List.of(Map.of( - "type", "Percent", - "value", 50, - "periodSeconds", 60 - )) - ) - ) - ) - )); - - // Triggers: Prometheus for open connections + CPU (only when CPU requests are defined) - // "or vector(0)" ensures the query returns 0 (not empty) when no pods match. - List> triggers = new ArrayList<>(); - triggers.add(Map.of( - "type", "prometheus", - "metadata", Map.of( - "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", - "metricName", "hive_metastore_open_connections", - "query", String.format( - "sum(hive_metastore_open_connections{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", - hiveCluster.getMetadata().getNamespace(), targetName), - "threshold", String.valueOf(maxThreads), - "activationThreshold", "0" - ) - )); - if (hiveCluster.getSpec().metastore().resources() != null) { - // activationValue prevents idle JVM CPU from keeping the ScaledObject active. - triggers.add(Map.of( - "type", "cpu", - "metricType", "Utilization", - "metadata", Map.of( - "value", "75", - "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) - ) - )); - } - spec.put("triggers", triggers); - - return new GenericKubernetesResourceBuilder() - .withApiVersion("keda.sh/v1alpha1") - .withKind("ScaledObject") - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "metastore")) - .endMetadata() - .withAdditionalProperties(Map.of("spec", spec)) - .build(); - } - - @Override - protected String getResourceName(HiveCluster hiveCluster) { - return resourceName(hiveCluster); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-metastore-scaledobject"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java deleted file mode 100644 index 2620a24e01d7..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.ConfigUtils; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the Kubernetes Service for the Hive Metastore (Thrift + REST ports). */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class MetastoreServiceDependent - extends HiveDependentResource { - - public MetastoreServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - int thriftPort = ConfigUtils.getInt( - hiveCluster.getSpec().metastore().configOverrides(), - ConfigUtils.METASTORE_THRIFT_PORT_KEY, - ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, - ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-metastore") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - MetastoreDeploymentDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withType("ClusterIP") - .withSelector(Labels.selectorForComponent(hiveCluster, - MetastoreDeploymentDependent.COMPONENT)) - .addNewPort() - .withName("thrift") - .withPort(thriftPort) - .withTargetPort(new IntOrString(thriftPort)) - .endPort() - .addNewPort() - .withName("rest") - .withPort(9001) - .withTargetPort(new IntOrString(9001)) - .endPort() - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java index a23c0c477436..25d0eb39a0f9 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java @@ -53,6 +53,12 @@ public SchemaInitJobDependent() { super(Job.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected Job desired(HiveCluster hiveCluster, Context context) { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java index 6a645f043574..230ba47edd13 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java @@ -55,6 +55,12 @@ public ScratchPvcDependent() { super(PersistentVolumeClaim.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected PersistentVolumeClaim desired(HiveCluster hiveCluster, Context context) { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java deleted file mode 100644 index 13fc6343cad0..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmPdbDependent.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; -import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * PodDisruptionBudget for Tez Application Masters. - * Ensures at least one Tez AM remains available in the warm pool during voluntary disruptions. - */ -public class TezAmPdbDependent - extends CRUDKubernetesDependentResource { - - public TezAmPdbDependent() { - super(PodDisruptionBudget.class); - } - - @Override - protected PodDisruptionBudget desired(HiveCluster hiveCluster, - Context context) { - return new PodDisruptionBudgetBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "tezam")) - .endMetadata() - .withNewSpec() - .withMinAvailable(new IntOrString(1)) - .withNewSelector() - .withMatchLabels(Labels.selectorForComponent(hiveCluster, "tezam")) - .endSelector() - .endSpec() - .build(); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-tezam-pdb"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java deleted file mode 100644 index 731eb6f08d97..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmScaledObjectDependent.java +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import io.fabric8.kubernetes.api.model.GenericKubernetesResource; -import io.fabric8.kubernetes.api.model.GenericKubernetesResourceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.processing.GroupVersionKind; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages a KEDA ScaledObject for Tez Application Master autoscaling. - *

- * Tez AMs run in a warm pool (StatefulSet). An unclaimed AM sits idle; - * a claimed AM actively orchestrates a query DAG and consumes CPU. - *

- * Scale-up trigger: - * - Pod CPU > 60% across the StatefulSet (most AMs claimed and working) - *

- * Scale-down trigger: - * - Pod CPU < 10% (many idle unclaimed AMs) - *

- * Cooldown: configurable (default 600s / 10 minutes) - */ -public class TezAmScaledObjectDependent extends HiveGenericDependentResource { - - public TezAmScaledObjectDependent() { - super(new GroupVersionKind("keda.sh", "v1alpha1", "ScaledObject")); - } - - @Override - protected GenericKubernetesResource desired(HiveCluster hiveCluster, - Context context) { - AutoscalingSpec autoscaling = hiveCluster.getSpec().tezAm().autoscaling(); - int maxReplicas = hiveCluster.getSpec().tezAm().replicas(); - String targetName = hiveCluster.getMetadata().getName() + "-tezam"; - - Map spec = new HashMap<>(); - spec.put("scaleTargetRef", Map.of( - "apiVersion", "apps/v1", - "kind", "StatefulSet", - "name", targetName - )); - // KEDA requires idleReplicaCount < minReplicaCount. - // For scale-to-zero: min=1 (minimum when active), idle=0 (scale to zero when idle). - // For non-zero min: just set minReplicaCount (no idle needed). - int minReplicaCount = Math.max(1, autoscaling.minReplicas()); - spec.put("minReplicaCount", minReplicaCount); - spec.put("maxReplicaCount", maxReplicas); - if (autoscaling.minReplicas() == 0) { - spec.put("idleReplicaCount", 0); - } - spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); - spec.put("pollingInterval", 5); - - spec.put("advanced", Map.of( - "horizontalPodAutoscalerConfig", Map.of( - "behavior", Map.of( - "scaleDown", Map.of( - "stabilizationWindowSeconds", autoscaling.cooldownSeconds(), - "policies", List.of(Map.of( - "type", "Pods", - "value", 1, - "periodSeconds", 60 - )) - ), - "scaleUp", Map.of( - "stabilizationWindowSeconds", 60, - "policies", List.of(Map.of( - "type", "Pods", - "value", 2, - "periodSeconds", 30 - )) - ) - ) - ) - )); - - // Triggers: - // 1. CPU utilization — the primary proportional scaler for warm-pool Tez AMs - // (only included when container has CPU requests defined, required by KEDA) - // 2. HS2 cross-component activation: when HS2 has open sessions, - // TezAM should be available (enables wake-from-zero) - // - // When CPU IS available: CPU drives proportional scaling, HS2 trigger is activation-only - // (threshold set to maxReplicas so it never dominates the HPA calculation). - // When CPU is NOT available: tez_session_pending_tasks drives proportional scaling - // (real query demand — tasks waiting for AM slots), with HS2 sessions for activation only. - String hs2TargetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; - String namespace = hiveCluster.getMetadata().getNamespace(); - List> triggers = new ArrayList<>(); - if (hiveCluster.getSpec().tezAm().resources() != null) { - // CPU drives proportional scaling; activationValue prevents idle JVM CPU - // from keeping the ScaledObject permanently "active" (blocks scale-to-zero). - triggers.add(Map.of( - "type", "cpu", - "metricType", "Utilization", - "metadata", Map.of( - "value", String.valueOf(autoscaling.scaleUpThreshold()), - "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) - ) - )); - // Activation-only: (sessions > bool 0) returns 0 or 1, with threshold=maxReplicas - // ensures desired = ceil(1/max) = 1 — never drives replica count above min. - // activationThreshold=0 ensures any open session wakes TezAM from zero. - // Uses hs2_open_sessions (connection-level) not hs2_active_sessions (query-level). - // "or vector(0)" ensures the query returns 0 (not empty) when HS2 has no pods. - triggers.add(Map.of( - "type", "prometheus", - "metadata", Map.of( - "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", - "metricName", "hs2_open_sessions_activation", - "query", String.format( - "(max(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) > bool 0) or vector(0)", - namespace, hs2TargetName), - "threshold", String.valueOf(maxReplicas), - "activationThreshold", "0" - ) - )); - } else { - // No CPU available: use tez_session_pending_tasks for proportional scaling. - // This metric reflects real query demand (tasks waiting for AM slots), unlike - // hs2_open_sessions which includes zombie/idle sessions from ungracefully closed clients. - // Threshold: scaleUpThreshold interpreted as pending-tasks-per-AM (default 60 when - // using CPU mode, but for pending tasks a lower value like 5-10 is recommended). - // "or vector(0)" ensures the query returns 0 when HS2 has no pods. - triggers.add(Map.of( - "type", "prometheus", - "metadata", Map.of( - "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", - "metricName", "tez_session_pending_tasks", - "query", String.format( - "sum(tez_session_pending_tasks{namespace=\"%s\",pod=~\"%s-.*\"}) or vector(0)", - namespace, hs2TargetName), - "threshold", String.valueOf(autoscaling.scaleUpThreshold()), - "activationThreshold", "0" - ) - )); - // Activation-only: (sessions > bool 0) returns 0 or 1, with threshold=maxReplicas - // ensures desired = ceil(1/max) = 1 — never drives replica count above min. - // activationThreshold=0 ensures any open session wakes TezAM from zero. - triggers.add(Map.of( - "type", "prometheus", - "metadata", Map.of( - "serverAddress", "http://prometheus-server.monitoring.svc.cluster.local", - "metricName", "hs2_open_sessions_activation", - "query", String.format( - "(max(hs2_open_sessions{namespace=\"%s\",pod=~\"%s-.*\"}) > bool 0) or vector(0)", - namespace, hs2TargetName), - "threshold", String.valueOf(maxReplicas), - "activationThreshold", "0" - ) - )); - } - spec.put("triggers", triggers); - - return new GenericKubernetesResourceBuilder() - .withApiVersion("keda.sh/v1alpha1") - .withKind("ScaledObject") - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, "tezam")) - .endMetadata() - .withAdditionalProperties(Map.of("spec", spec)) - .build(); - } - - @Override - protected String getResourceName(HiveCluster hiveCluster) { - return resourceName(hiveCluster); - } - - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-tezam-scaledobject"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java deleted file mode 100644 index 781685286038..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages the headless Kubernetes Service for Tez Application Master. - * Required by the StatefulSet for stable DNS entries so that - * HiveServer2 can resolve TezAM pod hostnames for RPC communication. - */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class TezAmServiceDependent - extends HiveDependentResource { - - public TezAmServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-tezam") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - TezAmStatefulSetDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withClusterIP("None") - .withSelector(Labels.selectorForComponent(hiveCluster, - TezAmStatefulSetDependent.COMPONENT)) - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java index 56e60ca10403..01a0509f99ca 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java @@ -24,10 +24,7 @@ import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.ContainerPort; -import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.EnvVar; -import io.fabric8.kubernetes.api.model.Lifecycle; -import io.fabric8.kubernetes.api.model.LifecycleBuilder; import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; import io.javaoperatorsdk.operator.api.reconciler.Context; @@ -62,6 +59,12 @@ public TezAmStatefulSetDependent() { super(StatefulSet.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected StatefulSet desired(HiveCluster hiveCluster, Context context) { @@ -103,8 +106,8 @@ protected StatefulSet desired(HiveCluster hiveCluster, List volumes = new ArrayList<>(); volumes.add(buildProjectedConfigVolume("hive-config", - HiveServer2ConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); + HiveConfigMapDependent.HiveServer2.resourceName(hiveCluster), + HiveConfigMapDependent.Hadoop.resourceName(hiveCluster))); volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() .withName("scratch") .withNewPersistentVolumeClaim() @@ -133,18 +136,12 @@ protected StatefulSet desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); - // When autoscaling is enabled and the StatefulSet already exists, preserve the current - // replica count (managed by KEDA/HPA). On initial creation: - // - minReplicas == 0: start at 0, KEDA scales up when hs2_active_sessions > 0 - // - minReplicas > 0: start at configured replicas - boolean autoscalingEnabled = tezAm.autoscaling() != null && tezAm.autoscaling().isEnabled(); - Integer replicas = tezAm.replicas(); - if (autoscalingEnabled) { - int initialReplicas = tezAm.autoscaling().minReplicas() == 0 ? 0 : tezAm.replicas(); - replicas = getSecondaryResource(hiveCluster, context) - .map(s -> s.getSpec().getReplicas()) - .orElse(initialReplicas); - } + // When autoscaling is enabled, preserve current replica count (KEDA/HPA manages it). + AutoscalingSpec tezAmAutoscaling = tezAm.autoscaling(); + int initialReplicas = tezAmAutoscaling != null && tezAmAutoscaling.minReplicas() == 0 + ? 0 : tezAm.replicas(); + Integer replicas = resolveReplicaCount( + hiveCluster, context, tezAmAutoscaling, tezAm.replicas(), initialReplicas); StatefulSet statefulSet = new StatefulSetBuilder() .withNewMetadata() @@ -185,63 +182,21 @@ protected StatefulSet desired(HiveCluster hiveCluster, statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); // Graceful scale-down: poll JMX Exporter (port 9404) for DAGsRunning to reach 0. - // K8s removes the pod from Service Endpoints, so HS2 won't assign new DAGs to this AM. - // We read from the same Prometheus-format endpoint that KEDA uses — flat text, not brittle JSON. if (autoscaling.isEnabled()) { - String preStopScript = String.join("\n", - "#!/bin/bash", - "echo '[preStop] Waiting for active DAGs to complete (polling localhost:9404/metrics)...'", - "RETRIES=0", - "while true; do", - " RESPONSE=$(curl -sf http://localhost:9404/metrics)", - " if [ $? -ne 0 ]; then", - " RETRIES=$((RETRIES+1))", - " echo \"[preStop] ERROR: JMX Exporter unreachable on port 9404 (attempt $RETRIES)\"", - " if [ $RETRIES -ge 6 ]; then", - " echo '[preStop] JMX Exporter not responding after 60s. Proceeding with shutdown.'", - " break", - " fi", - " sleep 10; continue", - " fi", - " DAGS=$(echo \"$RESPONSE\" | grep '^tez_am_dagsrunning ' | awk '{print $2}')", - " if [ -z \"$DAGS\" ]; then", - " echo '[preStop] WARNING: tez_am_dagsrunning metric not found. JMX Exporter may not be configured.'", - " break", - " fi", - " if [ \"${DAGS%.*}\" -le 0 ] 2>/dev/null; then", - " echo '[preStop] No active DAGs. Safe to terminate Tez AM.'", - " break", - " fi", - " echo \"[preStop] tez_am_dagsrunning=$DAGS — waiting...\"", - " RETRIES=0", - " sleep 10", - "done"); - Lifecycle lifecycle = new LifecycleBuilder() - .withNewPreStop() - .withNewExec() - .withCommand("/bin/bash", "-c", preStopScript) - .endExec() - .endPreStop() - .build(); - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).setLifecycle(lifecycle); - statefulSet.getSpec().getTemplate().getSpec() - .setTerminationGracePeriodSeconds((long) autoscaling.gracePeriodSeconds()); + String preStopScript = buildDrainScript( + "Waiting for active DAGs to complete", + "tez_am_dagsrunning", "DAGS", + "No active DAGs. Safe to terminate Tez AM.", + 10, 6, null); + applyAutoscalingLifecycle( + statefulSet.getSpec().getTemplate().getSpec(), + statefulSet.getSpec().getTemplate().getMetadata(), + preStopScript, autoscaling.gracePeriodSeconds()); } - if (spec.volumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (tezAm.extraVolumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(tezAm.extraVolumes()); - } - if (tezAm.extraVolumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(tezAm.extraVolumeMounts()); - } + appendUserVolumes(statefulSet.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + tezAm.extraVolumes(), tezAm.extraVolumeMounts()); return statefulSet; } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java deleted file mode 100644 index bf14dac91e54..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2AutoscalingCondition.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for HiveServer2 autoscaling dependent resources. - * Returns true only when spec.hiveServer2.autoscaling.enabled is true. - */ -public class HiveServer2AutoscalingCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().hiveServer2().autoscaling().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java deleted file mode 100644 index 9c01942e4a1c..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2MetricScalingCondition.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; - -/** - * Activation condition for HiveServer2 Prometheus-based ScaledObject. - * Returns true when autoscaling is enabled AND minReplicas > 0. - * When minReplicas == 0, the HTTPScaledObject is used instead (scale-to-zero). - */ -public class HiveServer2MetricScalingCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - AutoscalingSpec autoscaling = primary.getSpec().hiveServer2().autoscaling(); - return autoscaling.isEnabled() && autoscaling.minReplicas() > 0; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java deleted file mode 100644 index 81f07269e9c9..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.apps.Deployment; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Precondition for HiveServer2 Deployment. - * If Metastore is external, proceed immediately. - * If managed, wait for Metastore pods to be ready. - */ -public class HiveServer2Precondition implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - - if (!primary.getSpec().metastore().isEnabled()) { - return true; - } - - // When autoscaling is enabled, wait for minReplicas (KEDA manages scaling beyond that). - // Without autoscaling, wait for all configured replicas. - int desiredReplicas; - if (primary.getSpec().metastore().autoscaling().isEnabled()) { - desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); - } else { - desiredReplicas = primary.getSpec().metastore().replicas(); - } - return context.getSecondaryResources(Deployment.class).stream() - .filter(d -> d.getMetadata().getName().equals(primary.getMetadata().getName() + "-metastore")) - .findFirst() - .map(deployment -> deployment.getStatus() != null - && deployment.getStatus().getReadyReplicas() != null - && deployment.getStatus().getReadyReplicas() >= desiredReplicas) - .orElse(false); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java deleted file mode 100644 index 7ae91b8f7b8f..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2ScaleToZeroCondition.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; - -/** - * Activation condition for HiveServer2 scale-to-zero (HTTPScaledObject). - * Returns true when autoscaling is enabled AND minReplicas == 0. - * Requires the KEDA HTTP Add-on to be installed in the cluster. - */ -public class HiveServer2ScaleToZeroCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - AutoscalingSpec autoscaling = primary.getSpec().hiveServer2().autoscaling(); - return autoscaling.isEnabled() && autoscaling.minReplicas() == 0; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java deleted file mode 100644 index f4e097786b08..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapAutoscalingCondition.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for LLAP autoscaling dependent resources. - * Returns true only when spec.llap.enabled is true and spec.llap.autoscaling.enabled is true. - */ -public class LlapAutoscalingCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().llap().isEnabled() - && primary.getSpec().llap().autoscaling().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java deleted file mode 100644 index a113c50efbff..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for LLAP dependent resources. - * Returns true only when spec.llap.enabled is true. - */ -public class LlapEnabledCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().llap().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java deleted file mode 100644 index a0ac83d8a423..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreAutoscalingCondition.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for Metastore autoscaling dependent resources. - * Returns true only when spec.metastore.autoscaling.enabled is true and metastore is managed. - */ -public class MetastoreAutoscalingCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().metastore().isEnabled() - && primary.getSpec().metastore().autoscaling().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java deleted file mode 100644 index b1cb4139ac96..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for Metastore dependent resources. - * Returns true only when spec.metastore.enabled is true. - */ -public class MetastoreEnabledCondition implements Condition { - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().metastore().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java deleted file mode 100644 index 58885c6e8865..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.apps.Deployment; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Ready condition that checks whether the Metastore Deployment has the - * desired number of ready replicas. Used to gate HiveServer2 Deployment. - */ -public class MetastoreReadyCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - if (!primary.getSpec().metastore().isEnabled()) { - return true; - } - // When autoscaling is enabled, wait for minReplicas (KEDA manages scaling beyond that). - // Without autoscaling, wait for all configured replicas. - int desiredReplicas; - if (primary.getSpec().metastore().autoscaling().isEnabled()) { - desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); - } else { - desiredReplicas = primary.getSpec().metastore().replicas(); - } - return dependentResource.getSecondaryResource(primary, context) - .map(deployment -> deployment.getStatus() != null - && deployment.getStatus().getReadyReplicas() != null - && deployment.getStatus().getReadyReplicas() >= desiredReplicas) - .orElse(false); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java deleted file mode 100644 index 1b0b44318596..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.batch.v1.Job; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Ready condition that checks whether the schema initialization Job - * has completed successfully. Used to gate Metastore Deployment creation. - */ -public class SchemaJobCompletedCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - if (!primary.getSpec().metastore().isEnabled()) { - return true; - } - return dependentResource.getSecondaryResource(primary, context) - .map(job -> job.getStatus() != null - && job.getStatus().getSucceeded() != null - && job.getStatus().getSucceeded() >= 1) - .orElse(false); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java deleted file mode 100644 index a2ed23cbbadc..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmAutoscalingCondition.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for Tez AM autoscaling dependent resources. - * Returns true only when spec.tezAm.enabled is true and spec.tezAm.autoscaling.enabled is true. - */ -public class TezAmAutoscalingCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().tezAm().isEnabled() - && primary.getSpec().tezAm().autoscaling().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java deleted file mode 100644 index 85ae7e45dbdb..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for Tez AM dependent resources. - * Returns true only when spec.tezAm.enabled is true. - */ -public class TezAmEnabledCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().tezAm().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java index 40dd8a771203..1897582bd18e 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java @@ -78,6 +78,14 @@ public record HiveClusterSpec( public HiveClusterSpec { Objects.requireNonNull(zookeeper, "zookeeper must be provided in the HiveCluster spec"); + metastore = metastore != null ? metastore : new MetastoreSpec( + 1, null, null, null, null, null, null, true, null, null, null, null); + hiveServer2 = hiveServer2 != null ? hiveServer2 : new HiveServer2Spec( + 1, null, null, null, null, null, null, null, null, null); + llap = llap != null ? llap : new LlapSpec( + 1, null, null, null, null, true, null, null, null, null, null); + tezAm = tezAm != null ? tezAm : new TezAmSpec( + 1, null, null, null, null, true, null, null, null); envVars = envVars != null ? envVars : List.of(); externalJars = externalJars != null ? externalJars : List.of(); volumes = volumes != null ? volumes : List.of(); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java index c973145b0080..6a0008b9147d 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java @@ -63,7 +63,7 @@ public record TezAmSpec( extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( - false, 0, 60, 10, 600, 120); + false, 0, 5, 10, 600, 120); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java index e621f7065a54..71453ef0335b 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java @@ -35,43 +35,6 @@ import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusUpdateControl; import io.javaoperatorsdk.operator.api.reconciler.Reconciler; import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; -import io.javaoperatorsdk.operator.api.reconciler.Workflow; -import io.javaoperatorsdk.operator.api.reconciler.dependent.Dependent; -import org.apache.hive.kubernetes.operator.dependent.HadoopConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2ConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2DeploymentDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2PdbDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2HttpScaledObjectDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2ScaledObjectDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2ServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapPdbDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapScaledObjectDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapStatefulSetDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastoreConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastoreDeploymentDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastorePdbDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastoreScaledObjectDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastoreServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.SchemaInitJobDependent; -import org.apache.hive.kubernetes.operator.dependent.ScratchPvcDependent; -import org.apache.hive.kubernetes.operator.dependent.TezAmPdbDependent; -import org.apache.hive.kubernetes.operator.dependent.TezAmScaledObjectDependent; -import org.apache.hive.kubernetes.operator.dependent.TezAmServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.TezAmStatefulSetDependent; -import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2AutoscalingCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2MetricScalingCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2Precondition; -import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2ScaleToZeroCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.LlapAutoscalingCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.LlapEnabledCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreAutoscalingCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreEnabledCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreReadyCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.SchemaJobCompletedCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.TezAmAutoscalingCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.TezAmEnabledCondition; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterStatus; import org.apache.hive.kubernetes.operator.model.status.ComponentStatus; @@ -83,74 +46,21 @@ * Orchestrates all dependent resources with proper dependency ordering. */ @ControllerConfiguration -@Workflow(dependents = { - // --- ConfigMap dependents --- - @Dependent(name = "hadoop-configmap", type = HadoopConfigMapDependent.class), - @Dependent(name = "metastore-configmap", type = MetastoreConfigMapDependent.class, - activationCondition = MetastoreEnabledCondition.class), - @Dependent(name = "hiveserver2-configmap", type = HiveServer2ConfigMapDependent.class), - // --- Job dependents --- - @Dependent(name = "schema-init-job", type = SchemaInitJobDependent.class, dependsOn = {"metastore-configmap", - "hadoop-configmap"}, readyPostcondition = SchemaJobCompletedCondition.class, - activationCondition = MetastoreEnabledCondition.class), - // --- Deployment dependents --- - @Dependent(name = "metastore-deployment", type = MetastoreDeploymentDependent.class, dependsOn = { - "schema-init-job"}, readyPostcondition = MetastoreReadyCondition.class, - activationCondition = MetastoreEnabledCondition.class), - // --- Service dependents --- - @Dependent(name = "metastore-service", type = MetastoreServiceDependent.class, dependsOn = { - "metastore-configmap"}, activationCondition = MetastoreEnabledCondition.class), - @Dependent(name = "hiveserver2-deployment", type = HiveServer2DeploymentDependent.class, dependsOn = { - "hiveserver2-configmap", "hadoop-configmap"}, reconcilePrecondition = HiveServer2Precondition.class), - @Dependent(name = "hiveserver2-service", type = HiveServer2ServiceDependent.class, dependsOn = { - "hiveserver2-configmap"}), - // --- LLAP (conditional) --- - @Dependent(name = "llap-configmap", type = LlapConfigMapDependent.class, - activationCondition = LlapEnabledCondition.class), - @Dependent(name = "llap-statefulset", type = LlapStatefulSetDependent.class, dependsOn = {"llap-configmap", - "hadoop-configmap"}, activationCondition = LlapEnabledCondition.class), - @Dependent(name = "llap-service", type = LlapServiceDependent.class, - activationCondition = LlapEnabledCondition.class), - // --- TezAM (conditional) --- - @Dependent(name = "scratch-pvc", type = ScratchPvcDependent.class, - activationCondition = TezAmEnabledCondition.class), - @Dependent(name = "tezam-service", type = TezAmServiceDependent.class, - activationCondition = TezAmEnabledCondition.class), - @Dependent(name = "tezam-statefulset", type = TezAmStatefulSetDependent.class, dependsOn = {"hiveserver2-configmap", - "hadoop-configmap", "tezam-service", "scratch-pvc"}, activationCondition = TezAmEnabledCondition.class), - // --- Autoscaling: KEDA ScaledObjects (conditional) --- - @Dependent(name = "hs2-scaledobject", type = HiveServer2ScaledObjectDependent.class, dependsOn = { - "hiveserver2-deployment"}, activationCondition = HiveServer2MetricScalingCondition.class), - @Dependent(name = "hs2-httpso", type = HiveServer2HttpScaledObjectDependent.class, dependsOn = { - "hiveserver2-deployment"}, activationCondition = HiveServer2ScaleToZeroCondition.class), - @Dependent(name = "metastore-scaledobject", type = MetastoreScaledObjectDependent.class, dependsOn = { - "metastore-deployment"}, activationCondition = MetastoreAutoscalingCondition.class), - @Dependent(name = "llap-scaledobject", type = LlapScaledObjectDependent.class, dependsOn = { - "llap-statefulset"}, activationCondition = LlapAutoscalingCondition.class), - @Dependent(name = "tezam-scaledobject", type = TezAmScaledObjectDependent.class, dependsOn = { - "tezam-statefulset"}, activationCondition = TezAmAutoscalingCondition.class), - // --- Autoscaling: PodDisruptionBudgets (conditional) --- - @Dependent(name = "hs2-pdb", type = HiveServer2PdbDependent.class, dependsOn = { - "hiveserver2-deployment"}, activationCondition = HiveServer2AutoscalingCondition.class), - @Dependent(name = "metastore-pdb", type = MetastorePdbDependent.class, dependsOn = { - "metastore-deployment"}, activationCondition = MetastoreAutoscalingCondition.class), - @Dependent(name = "llap-pdb", type = LlapPdbDependent.class, dependsOn = { - "llap-statefulset"}, activationCondition = LlapAutoscalingCondition.class), - @Dependent(name = "tezam-pdb", type = TezAmPdbDependent.class, dependsOn = { - "tezam-statefulset"}, activationCondition = TezAmAutoscalingCondition.class)}) public class HiveClusterReconciler implements Reconciler { private static final Logger LOG = LoggerFactory.getLogger(HiveClusterReconciler.class); @Override public UpdateControl reconcile(HiveCluster resource, Context context) { - LOG.debug("Reconciling HiveCluster: {}/{}", resource.getMetadata().getNamespace(), - resource.getMetadata().getName()); + LOG.debug("Reconciling HiveCluster: {}/{} generation={}", + resource.getMetadata().getNamespace(), + resource.getMetadata().getName(), + resource.getMetadata().getGeneration()); HiveClusterStatus existingStatus = resource.getStatus(); HiveClusterStatus newStatus = buildStatus(resource, context, existingStatus); - if (Objects.equals(existingStatus, newStatus)) { + if (statusEqualsIgnoringTimestamps(existingStatus, newStatus)) { return UpdateControl.noUpdate(); } @@ -161,8 +71,8 @@ public UpdateControl reconcile(HiveCluster resource, Context updateErrorStatus(HiveCluster resource, Context context, Exception e) { - LOG.error("Error reconciling HiveCluster: {}/{}", resource.getMetadata().getNamespace(), - resource.getMetadata().getName(), e); + LOG.error("Error reconciling HiveCluster: {}/{} - {}", resource.getMetadata().getNamespace(), + resource.getMetadata().getName(), e.getMessage(), e); HiveClusterStatus status = resource.getStatus() != null ? resource.getStatus() : new HiveClusterStatus(); @@ -312,14 +222,86 @@ private Condition buildCondition(String type, String conditionStatus, condition.setReason(reason); condition.setMessage(message); - // Preserve lastTransitionTime when the condition status has not changed + // Preserve lastTransitionTime from ANY existing condition of this type + // (regardless of status) to avoid generating new timestamps on every + // reconcile which would cause an infinite status-patch loop. String preservedTime = existingConditions.stream() - .filter(c -> type.equals(c.getType()) && conditionStatus.equals(c.getStatus())) + .filter(c -> type.equals(c.getType())) .map(Condition::getLastTransitionTime) .findFirst() .orElse(null); - condition.setLastTransitionTime(preservedTime != null ? preservedTime : Instant.now().toString()); + if (preservedTime != null) { + // Only update the timestamp if the status actually changed + String oldStatus = existingConditions.stream() + .filter(c -> type.equals(c.getType())) + .map(Condition::getStatus) + .findFirst() + .orElse(null); + if (conditionStatus.equals(oldStatus)) { + condition.setLastTransitionTime(preservedTime); + } else { + condition.setLastTransitionTime(Instant.now().toString()); + } + } else { + condition.setLastTransitionTime(Instant.now().toString()); + } return condition; } + + /** + * Compares two HiveClusterStatus objects ignoring condition timestamps. + * This prevents infinite reconciliation loops caused by informer cache lag: + * after a status patch, the informer may still have the old status, causing + * the next reconcile to see a "different" status (new timestamp vs old) and + * patch again, perpetuating the loop. + */ + private boolean statusEqualsIgnoringTimestamps(HiveClusterStatus a, HiveClusterStatus b) { + if (a == b) { + return true; + } + if (a == null || b == null) { + return false; + } + if (!Objects.equals(a.getObservedGeneration(), b.getObservedGeneration())) { + return false; + } + if (!Objects.equals(a.getMetastore(), b.getMetastore())) { + return false; + } + if (!Objects.equals(a.getHiveServer2(), b.getHiveServer2())) { + return false; + } + if (!Objects.equals(a.getLlap(), b.getLlap())) { + return false; + } + if (!Objects.equals(a.getTezAm(), b.getTezAm())) { + return false; + } + // Compare conditions by type+status+reason+message, ignoring lastTransitionTime + return conditionsEqualIgnoringTime(a.getConditions(), b.getConditions()); + } + + private boolean conditionsEqualIgnoringTime(List a, List b) { + if (a == b) { + return true; + } + if (a == null || b == null) { + return a == null && b == null; + } + if (a.size() != b.size()) { + return false; + } + for (int i = 0; i < a.size(); i++) { + Condition ca = a.get(i); + Condition cb = b.get(i); + if (!Objects.equals(ca.getType(), cb.getType()) + || !Objects.equals(ca.getStatus(), cb.getStatus()) + || !Objects.equals(ca.getReason(), cb.getReason()) + || !Objects.equals(ca.getMessage(), cb.getMessage())) { + return false; + } + } + return true; + } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveWorkflowSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveWorkflowSpec.java new file mode 100644 index 000000000000..46aa53890573 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveWorkflowSpec.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.reconciler; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.javaoperatorsdk.operator.api.config.dependent.DependentResourceSpec; +import io.javaoperatorsdk.operator.api.config.workflow.WorkflowSpec; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.dependent.HiveConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2DeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2InterceptorRouteDependent; +import org.apache.hive.kubernetes.operator.dependent.HivePdbDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveScaledObjectDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.LlapStatefulSetDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreDeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.SchemaInitJobDependent; +import org.apache.hive.kubernetes.operator.dependent.ScratchPvcDependent; +import org.apache.hive.kubernetes.operator.dependent.TezAmStatefulSetDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Programmatic workflow specification for the Hive Kubernetes Operator. + * Replaces the annotation-based {@code @Workflow} on the reconciler with + * explicit {@link DependentResourceSpec} entries and inline lambda conditions. + * This eliminates 12 single-method condition wrapper classes. + */ +public final class HiveWorkflowSpec implements WorkflowSpec { + + private static final Condition METASTORE_ENABLED = + (dr, primary, ctx) -> primary.getSpec().metastore().isEnabled(); + + private static final Condition LLAP_ENABLED = + (dr, primary, ctx) -> primary.getSpec().llap().isEnabled(); + + private static final Condition TEZAM_ENABLED = + (dr, primary, ctx) -> primary.getSpec().tezAm().isEnabled(); + + private static final Condition METASTORE_AUTOSCALING = + (dr, primary, ctx) -> primary.getSpec().metastore().isEnabled() + && primary.getSpec().metastore().autoscaling().isEnabled(); + + private static final Condition LLAP_AUTOSCALING = + (dr, primary, ctx) -> primary.getSpec().llap().isEnabled() + && primary.getSpec().llap().autoscaling().isEnabled(); + + private static final Condition TEZAM_AUTOSCALING = + (dr, primary, ctx) -> primary.getSpec().tezAm().isEnabled() + && primary.getSpec().tezAm().autoscaling().isEnabled(); + + private static final Condition HS2_AUTOSCALING = + (dr, primary, ctx) -> primary.getSpec().hiveServer2().autoscaling().isEnabled(); + + private static final Condition HS2_SCALE_TO_ZERO = + (dr, primary, ctx) -> primary.getSpec().hiveServer2().autoscaling().isEnabled() + && primary.getSpec().hiveServer2().autoscaling().minReplicas() == 0; + + // SPECS must be declared AFTER all conditions to avoid static init order issues. + private static final List SPECS = buildSpecs(); + + @SuppressWarnings({"rawtypes", "unchecked"}) + private static List buildSpecs() { + List specs = new ArrayList<>(); + + // --- ConfigMap dependents --- + specs.add(new DependentResourceSpec( + HiveConfigMapDependent.Hadoop.class, "hadoop-configmap", + Set.of(), null, null, null, null, null)); + + specs.add(new DependentResourceSpec( + HiveConfigMapDependent.Metastore.class, "metastore-configmap", + Set.of(), null, null, null, METASTORE_ENABLED, null)); + + specs.add(new DependentResourceSpec( + HiveConfigMapDependent.HiveServer2.class, "hiveserver2-configmap", + Set.of(), null, null, null, null, null)); + + // --- Job dependents --- + specs.add(new DependentResourceSpec( + SchemaInitJobDependent.class, "schema-init-job", + Set.of("metastore-configmap", "hadoop-configmap"), + schemaJobCompleted(), null, null, METASTORE_ENABLED, null)); + + // --- Deployment dependents --- + specs.add(new DependentResourceSpec( + MetastoreDeploymentDependent.class, "metastore-deployment", + Set.of("schema-init-job"), + metastoreReady(), null, null, METASTORE_ENABLED, null)); + + // --- Service dependents --- + specs.add(new DependentResourceSpec( + HiveServiceDependent.Metastore.class, "metastore-service", + Set.of("metastore-configmap"), + null, null, null, METASTORE_ENABLED, null)); + + specs.add(new DependentResourceSpec( + HiveServer2DeploymentDependent.class, "hiveserver2-deployment", + Set.of("hiveserver2-configmap", "hadoop-configmap"), + null, hs2Precondition(), null, null, null)); + + specs.add(new DependentResourceSpec( + HiveServiceDependent.HiveServer2.class, "hiveserver2-service", + Set.of("hiveserver2-configmap"), + null, null, null, null, null)); + + // --- LLAP (conditional) --- + specs.add(new DependentResourceSpec( + HiveConfigMapDependent.Llap.class, "llap-configmap", + Set.of(), null, null, null, LLAP_ENABLED, null)); + + specs.add(new DependentResourceSpec( + LlapStatefulSetDependent.class, "llap-statefulset", + Set.of("llap-configmap", "hadoop-configmap"), + null, null, null, LLAP_ENABLED, null)); + + specs.add(new DependentResourceSpec( + HiveServiceDependent.Llap.class, "llap-service", + Set.of(), null, null, null, LLAP_ENABLED, null)); + + // --- TezAM (conditional) --- + specs.add(new DependentResourceSpec( + ScratchPvcDependent.class, "scratch-pvc", + Set.of(), null, null, null, TEZAM_ENABLED, null)); + + specs.add(new DependentResourceSpec( + HiveServiceDependent.TezAm.class, "tezam-service", + Set.of(), null, null, null, TEZAM_ENABLED, null)); + + specs.add(new DependentResourceSpec( + TezAmStatefulSetDependent.class, "tezam-statefulset", + Set.of("hiveserver2-configmap", "hadoop-configmap", "tezam-service", "scratch-pvc"), + null, null, null, TEZAM_ENABLED, null)); + + specs.add(new DependentResourceSpec( + HiveScaledObjectDependent.HiveServer2.class, "hs2-scaledobject", + Set.of("hiveserver2-deployment"), + null, HS2_AUTOSCALING, null, null, null)); + + specs.add(new DependentResourceSpec( + HiveServer2InterceptorRouteDependent.class, "hs2-interceptor-route", + Set.of("hiveserver2-deployment"), + null, HS2_SCALE_TO_ZERO, null, null, null)); + + specs.add(new DependentResourceSpec( + HiveScaledObjectDependent.Metastore.class, "metastore-scaledobject", + Set.of("metastore-deployment"), + null, METASTORE_AUTOSCALING, null, null, null)); + + specs.add(new DependentResourceSpec( + HiveScaledObjectDependent.Llap.class, "llap-scaledobject", + Set.of("llap-statefulset", "hs2-scaledobject"), + null, LLAP_AUTOSCALING, null, null, null)); + + specs.add(new DependentResourceSpec( + HiveScaledObjectDependent.TezAm.class, "tezam-scaledobject", + Set.of("tezam-statefulset", "hs2-scaledobject"), + null, TEZAM_AUTOSCALING, null, null, null)); + + // --- Autoscaling: PodDisruptionBudgets (conditional) --- + specs.add(new DependentResourceSpec( + HivePdbDependent.HiveServer2.class, "hs2-pdb", + Set.of("hiveserver2-deployment"), + null, HS2_AUTOSCALING, null, null, null)); + + specs.add(new DependentResourceSpec( + HivePdbDependent.Metastore.class, "metastore-pdb", + Set.of("metastore-deployment"), + null, METASTORE_AUTOSCALING, null, null, null)); + + specs.add(new DependentResourceSpec( + HivePdbDependent.Llap.class, "llap-pdb", + Set.of("llap-statefulset"), + null, LLAP_AUTOSCALING, null, null, null)); + + specs.add(new DependentResourceSpec( + HivePdbDependent.TezAm.class, "tezam-pdb", + Set.of("tezam-statefulset"), + null, TEZAM_AUTOSCALING, null, null, null)); + + return Collections.unmodifiableList(specs); + } + + /** + * Ready postcondition: schema initialization Job must complete successfully + * before the Metastore Deployment is created. + */ + private static Condition schemaJobCompleted() { + return (dependentResource, primary, context) -> { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + return dependentResource.getSecondaryResource(primary, context) + .map(job -> { + var j = (io.fabric8.kubernetes.api.model.batch.v1.Job) job; + return j.getStatus() != null + && j.getStatus().getSucceeded() != null + && j.getStatus().getSucceeded() >= 1; + }) + .orElse(false); + }; + } + + /** + * Ready postcondition: Metastore Deployment must have the desired number + * of ready replicas before downstream dependents proceed. + */ + private static Condition metastoreReady() { + return (dependentResource, primary, context) -> { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + int desiredReplicas; + if (primary.getSpec().metastore().autoscaling().isEnabled()) { + desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); + } else { + desiredReplicas = primary.getSpec().metastore().replicas(); + } + return dependentResource.getSecondaryResource(primary, context) + .map(resource -> { + var deployment = (Deployment) resource; + return deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas; + }) + .orElse(false); + }; + } + + /** + * Reconcile precondition for HiveServer2: if Metastore is managed, + * wait for it to be ready before reconciling HS2. + */ + private static Condition hs2Precondition() { + return (dependentResource, primary, context) -> { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + int desiredReplicas; + if (primary.getSpec().metastore().autoscaling().isEnabled()) { + desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); + } else { + desiredReplicas = primary.getSpec().metastore().replicas(); + } + return context.getSecondaryResources(Deployment.class).stream() + .filter(d -> d.getMetadata().getName().equals( + primary.getMetadata().getName() + "-metastore")) + .findFirst() + .map(deployment -> deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas) + .orElse(false); + }; + } + + @Override + public List getDependentResourceSpecs() { + return SPECS; + } + + @Override + public boolean isExplicitInvocation() { + return false; + } + + @Override + public boolean handleExceptionsInReconciler() { + return true; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java index 35553d9cb445..85e6882d4d68 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java @@ -54,7 +54,6 @@ */ class TestVectorMapJoinOuterGenerateResultOperator { - /** Concrete subclass that exposes the generateOuterNulls* methods to tests. */ private static final class TestableOuterOp extends VectorMapJoinOuterGenerateResultOperator { @Override protected String getLoggingPrefix() { From 289c2b426acb0cb0cbc5e354a44e2788bc1cb92e Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 28 May 2026 19:36:45 +0530 Subject: [PATCH 3/4] Fix CPU Utilization Scaling --- packaging/src/kubernetes/README.md | 79 +++++++++------- .../crds/hiveclusters.hive.apache.org-v1.yml | 72 ++++++++++----- .../hive-operator/templates/hivecluster.yaml | 18 ++++ .../kubernetes/helm/hive-operator/values.yaml | 9 +- .../dependent/HiveScaledObjectDependent.java | 89 ++++++++++++------- .../dependent/HiveServiceDependent.java | 17 ++-- .../operator/model/spec/AutoscalingSpec.java | 13 ++- .../operator/model/spec/HiveServer2Spec.java | 2 +- .../operator/model/spec/LlapSpec.java | 2 +- .../operator/model/spec/MetastoreSpec.java | 2 +- .../operator/model/spec/TezAmSpec.java | 2 +- 11 files changed, 203 insertions(+), 102 deletions(-) diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md index ac2a4d0e9584..afe55dbe9139 100644 --- a/packaging/src/kubernetes/README.md +++ b/packaging/src/kubernetes/README.md @@ -647,7 +647,7 @@ without creating a conflicting second ScaledObject. The HS2 ScaledObject combines three trigger types in a single resource: - **Prometheus trigger** (`hs2_open_sessions`) — session-aware scaling -- **CPU trigger** — load-based scaling when resources are configured +- **CPU trigger** (`AverageValue` in millicores) — load-based scaling when `targetCpuValue` is configured - **external-push trigger** — wake-from-zero via the KEDA HTTP Add-on interceptor The `InterceptorRoute` CRD (`http.keda.sh/v1beta1`) configures only the interceptor @@ -791,69 +791,82 @@ When autoscaling is enabled, the operator automatically: | **LLAP** | `hadoop_llapdaemon_executornumqueuedrequests`, `hadoop_llapdaemon_executornumexecutorsconfigured`, `hadoop_llapdaemon_executornumexecutorsavailable` | Total busy slots = queued + configured - available (scaling trigger) | | **Tez AM** | Standard K8s CPU metrics or `tez_session_pending_tasks` (from HS2) | CPU utilization or pending task count (scaling trigger) | -### CPU-Based Scaling and Resource Requests +### CPU-Based Scaling -The operator includes a **CPU utilization trigger** in the ScaledObject for HS2, Metastore, -and Tez AM. KEDA's CPU trigger uses the `Utilization` metric type, which is defined as a -percentage of the container's CPU request. This means **the container must have a CPU request -defined** for the trigger to work. +The operator can include a **CPU trigger** in the ScaledObject for HS2, Metastore, and Tez AM. +The trigger uses KEDA's `AverageValue` metric type with **absolute millicore targets** that +you specify directly. This handles burstable QoS pods correctly — unlike `Utilization` +(which measures against the CPU request), `AverageValue` uses actual CPU consumption in +absolute terms, so pods with a small request but high limit won't show perpetual >100% +utilization that prevents scale-down. + +**The CPU trigger is opt-in:** it is only added to the ScaledObject when you explicitly set +both `targetCpuValue` and `activationCpuValue` in the autoscaling config. If omitted, the +operator relies solely on the Prometheus-based trigger (sessions, connections, etc.). **How it works:** -- The CPU trigger scales up when pod CPU utilization exceeds `scaleUpThreshold`% of the CPU request -- The `scaleDownThreshold` configures the **activation threshold** — below this CPU%, the - trigger is completely inactive (doesn't participate in scaling decisions) -- Both the CPU trigger and the Prometheus-based trigger (sessions/connections) are evaluated - independently — if **either** exceeds its threshold, the component scales up (OR logic) -- Scale-down only happens when **both** triggers agree load is low (all below threshold) +- `targetCpuValue` — the average CPU per pod (e.g., `"1500m"` or `"1"`) that triggers scale-up +- `activationCpuValue` — below this CPU value, the trigger is completely inactive + (doesn't participate in scaling decisions at all) +- Both the CPU trigger and the Prometheus-based trigger are evaluated independently — + if **either** exceeds its threshold, the component scales up (OR logic) +- Scale-down only happens when **both** triggers agree load is low +- The component must also have `resources` defined on its pods; if `targetCpuValue` is set + but `resources` is missing, the operator logs a warning and skips the CPU trigger -This means a long-running CPU-intensive query will keep the pod scaled even if there's -only one session open. Conversely, many idle sessions will keep it scaled even at low CPU. +**Example:** With `targetCpuValue: "1600m"` and `activationCpuValue: "400m"`, KEDA scales up +when average pod CPU exceeds 1600m and considers the trigger inactive below 400m. -If you enable autoscaling without setting `resources` for that component, the operator -will omit the CPU trigger and rely solely on the Prometheus-based trigger. For Tez AM -specifically, without CPU resources the operator uses `tez_session_pending_tasks` (queued -tasks waiting for AM slots) as the proportional scaler — this reflects real query demand -rather than connection count, avoiding spurious scale-ups from idle or zombie sessions. +For Tez AM specifically, without CPU targets the operator uses `tez_session_pending_tasks` +(queued tasks waiting for AM slots) as the proportional scaler — this reflects real query +demand rather than connection count, avoiding spurious scale-ups from idle sessions. -To get both Prometheus and CPU-based scaling, set `resources` on the component: +To enable both Prometheus and CPU-based scaling: ```yaml cluster: hiveServer2: resources: - requestsCpu: "1" # Required for CPU-based autoscaling + requestsCpu: "500m" + limitsCpu: "2" requestsMemory: "2Gi" autoscaling: enabled: true - scaleDownThreshold: 30 # CPU trigger inactive below 30% (default) + scaleUpThreshold: 1 # scale up when avg sessions > 1 per pod + targetCpuValue: "1600m" # scale up when avg CPU > 1600m per pod + activationCpuValue: "400m" # CPU trigger inactive below 400m metastore: resources: - requestsCpu: "500m" # Required for CPU-based autoscaling + requestsCpu: "500m" + limitsCpu: "1" requestsMemory: "1Gi" autoscaling: enabled: true + targetCpuValue: "750m" + activationCpuValue: "200m" tezAm: resources: - requestsCpu: "500m" # Required for CPU-based autoscaling + requestsCpu: "250m" + limitsCpu: "1" requestsMemory: "1Gi" autoscaling: enabled: true - scaleUpThreshold: 60 # For TezAM with resources, this IS the CPU target % - scaleDownThreshold: 10 # CPU trigger inactive below 10% + targetCpuValue: "600m" + activationCpuValue: "100m" ``` | Setting | Effect on CPU trigger | |---------|----------------------| -| `resources.requestsCpu` | **Enables** the CPU trigger (required) | -| `scaleUpThreshold` | CPU target % — scales up when utilization exceeds this (default 80) | -| `scaleDownThreshold` | Activation value — CPU trigger ignored below this % (default 30) | +| `targetCpuValue` | Absolute CPU target (e.g., `"1500m"` or `"1"`). **Required** to enable CPU trigger. | +| `activationCpuValue` | CPU below which trigger is inactive. **Required** with targetCpuValue. | +| `resources` | Pod resources must be defined — operator warns and skips CPU trigger otherwise. | > **Note:** LLAP scaling uses only Prometheus triggers (total busy slots) -> and does not include a CPU trigger, so LLAP does not require `resources` to -> be set for autoscaling to work. +> and does not include a CPU trigger, so LLAP does not require `targetCpuValue` +> for autoscaling to work. ### Helm Values Reference (Autoscaling) @@ -1073,7 +1086,9 @@ kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hi | `cluster..autoscaling.enabled` | `false` | Enable KEDA-based autoscaling for this component | | `cluster..autoscaling.minReplicas` | `0` | Floor replica count. 0 enables scale-to-zero (HS2 requires KEDA HTTP Add-on) | | `cluster..autoscaling.scaleUpThreshold` | `80` | Metric threshold triggering scale-up (sessions for HS2, connections for HMS, busy slots for LLAP, pending tasks or CPU% for TezAM) | -| `cluster..autoscaling.scaleDownThreshold` | `30` | CPU activation threshold below which the CPU trigger is inactive | +| `cluster..autoscaling.scaleDownThreshold` | `30` | Prometheus metric threshold for scale-down (component-specific) | +| `cluster..autoscaling.targetCpuValue` | — | Absolute CPU target for scale-up (e.g., `1500m`). Omit to disable CPU trigger. | +| `cluster..autoscaling.activationCpuValue` | — | CPU value below which CPU trigger is inactive. Required with targetCpuValue. | | `cluster..autoscaling.cooldownSeconds` | `300` | Seconds to wait after last scale event before scaling down again | | `cluster..autoscaling.gracePeriodSeconds` | `60-600` | Max time (seconds) to wait for graceful drain before forced termination | diff --git a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml index 6fc5916b84dd..0d12f5dd6248 100644 --- a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +++ b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml @@ -48,6 +48,10 @@ spec: description: Autoscaling configuration (requires KEDA installed in the cluster) properties: + activationCpuValue: + description: CPU average value below which the trigger is + inactive. Required if targetCpuValue is set. + type: string cooldownSeconds: default: 600 description: Cooldown period in seconds after a scaling event @@ -70,17 +74,19 @@ spec: type: integer scaleDownThreshold: default: 20 - description: "Percentage threshold that triggers scale-down\ - \ (all conditions must be met: metric below threshold AND\ - \ CPU below threshold)" + description: Threshold that triggers scale-down for Prometheus-based + metrics type: integer scaleUpThreshold: default: 80 description: "Threshold that triggers scale-up (component-specific:\ - \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ - \ with resources, pending tasks per AM for TezAM without\ - \ resources)" + \ sessions for HS2, connections for HMS, queue depth for\ + \ LLAP, pending tasks for TezAM)" type: integer + targetCpuValue: + description: "Target CPU average value for scaling (e.g.,\ + \ '1500m' or '1'). If omitted, CPU scaling is disabled." + type: string type: object configOverrides: additionalProperties: @@ -194,6 +200,10 @@ spec: description: Autoscaling configuration (requires KEDA installed in the cluster) properties: + activationCpuValue: + description: CPU average value below which the trigger is + inactive. Required if targetCpuValue is set. + type: string cooldownSeconds: default: 600 description: Cooldown period in seconds after a scaling event @@ -216,17 +226,19 @@ spec: type: integer scaleDownThreshold: default: 20 - description: "Percentage threshold that triggers scale-down\ - \ (all conditions must be met: metric below threshold AND\ - \ CPU below threshold)" + description: Threshold that triggers scale-down for Prometheus-based + metrics type: integer scaleUpThreshold: default: 80 description: "Threshold that triggers scale-up (component-specific:\ - \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ - \ with resources, pending tasks per AM for TezAM without\ - \ resources)" + \ sessions for HS2, connections for HMS, queue depth for\ + \ LLAP, pending tasks for TezAM)" type: integer + targetCpuValue: + description: "Target CPU average value for scaling (e.g.,\ + \ '1500m' or '1'). If omitted, CPU scaling is disabled." + type: string type: object configOverrides: additionalProperties: @@ -315,6 +327,10 @@ spec: description: Autoscaling configuration (requires KEDA installed in the cluster) properties: + activationCpuValue: + description: CPU average value below which the trigger is + inactive. Required if targetCpuValue is set. + type: string cooldownSeconds: default: 600 description: Cooldown period in seconds after a scaling event @@ -337,17 +353,19 @@ spec: type: integer scaleDownThreshold: default: 20 - description: "Percentage threshold that triggers scale-down\ - \ (all conditions must be met: metric below threshold AND\ - \ CPU below threshold)" + description: Threshold that triggers scale-down for Prometheus-based + metrics type: integer scaleUpThreshold: default: 80 description: "Threshold that triggers scale-up (component-specific:\ - \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ - \ with resources, pending tasks per AM for TezAM without\ - \ resources)" + \ sessions for HS2, connections for HMS, queue depth for\ + \ LLAP, pending tasks for TezAM)" type: integer + targetCpuValue: + description: "Target CPU average value for scaling (e.g.,\ + \ '1500m' or '1'). If omitted, CPU scaling is disabled." + type: string type: object configOverrides: additionalProperties: @@ -489,6 +507,10 @@ spec: description: Autoscaling configuration (requires KEDA installed in the cluster) properties: + activationCpuValue: + description: CPU average value below which the trigger is + inactive. Required if targetCpuValue is set. + type: string cooldownSeconds: default: 600 description: Cooldown period in seconds after a scaling event @@ -511,17 +533,19 @@ spec: type: integer scaleDownThreshold: default: 20 - description: "Percentage threshold that triggers scale-down\ - \ (all conditions must be met: metric below threshold AND\ - \ CPU below threshold)" + description: Threshold that triggers scale-down for Prometheus-based + metrics type: integer scaleUpThreshold: default: 80 description: "Threshold that triggers scale-up (component-specific:\ - \ sessions for HS2, queue depth for LLAP, CPU% for TezAM\ - \ with resources, pending tasks per AM for TezAM without\ - \ resources)" + \ sessions for HS2, connections for HMS, queue depth for\ + \ LLAP, pending tasks for TezAM)" type: integer + targetCpuValue: + description: "Target CPU average value for scaling (e.g.,\ + \ '1500m' or '1'). If omitted, CPU scaling is disabled." + type: string type: object configOverrides: additionalProperties: diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml index c490b1f5d333..cbfe053a0e48 100644 --- a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml @@ -73,6 +73,12 @@ spec: minReplicas: {{ .Values.cluster.metastore.autoscaling.minReplicas }} scaleUpThreshold: {{ .Values.cluster.metastore.autoscaling.scaleUpThreshold }} scaleDownThreshold: {{ .Values.cluster.metastore.autoscaling.scaleDownThreshold }} + {{- if .Values.cluster.metastore.autoscaling.targetCpuValue }} + targetCpuValue: {{ .Values.cluster.metastore.autoscaling.targetCpuValue | quote }} + {{- end }} + {{- if .Values.cluster.metastore.autoscaling.activationCpuValue }} + activationCpuValue: {{ .Values.cluster.metastore.autoscaling.activationCpuValue | quote }} + {{- end }} cooldownSeconds: {{ .Values.cluster.metastore.autoscaling.cooldownSeconds }} gracePeriodSeconds: {{ .Values.cluster.metastore.autoscaling.gracePeriodSeconds }} {{- end }} @@ -111,6 +117,12 @@ spec: minReplicas: {{ .Values.cluster.hiveServer2.autoscaling.minReplicas }} scaleUpThreshold: {{ .Values.cluster.hiveServer2.autoscaling.scaleUpThreshold }} scaleDownThreshold: {{ .Values.cluster.hiveServer2.autoscaling.scaleDownThreshold }} + {{- if .Values.cluster.hiveServer2.autoscaling.targetCpuValue }} + targetCpuValue: {{ .Values.cluster.hiveServer2.autoscaling.targetCpuValue | quote }} + {{- end }} + {{- if .Values.cluster.hiveServer2.autoscaling.activationCpuValue }} + activationCpuValue: {{ .Values.cluster.hiveServer2.autoscaling.activationCpuValue | quote }} + {{- end }} cooldownSeconds: {{ .Values.cluster.hiveServer2.autoscaling.cooldownSeconds }} gracePeriodSeconds: {{ .Values.cluster.hiveServer2.autoscaling.gracePeriodSeconds }} {{- end }} @@ -179,6 +191,12 @@ spec: minReplicas: {{ .Values.cluster.tezAm.autoscaling.minReplicas }} scaleUpThreshold: {{ .Values.cluster.tezAm.autoscaling.scaleUpThreshold }} scaleDownThreshold: {{ .Values.cluster.tezAm.autoscaling.scaleDownThreshold }} + {{- if .Values.cluster.tezAm.autoscaling.targetCpuValue }} + targetCpuValue: {{ .Values.cluster.tezAm.autoscaling.targetCpuValue | quote }} + {{- end }} + {{- if .Values.cluster.tezAm.autoscaling.activationCpuValue }} + activationCpuValue: {{ .Values.cluster.tezAm.autoscaling.activationCpuValue | quote }} + {{- end }} cooldownSeconds: {{ .Values.cluster.tezAm.autoscaling.cooldownSeconds }} gracePeriodSeconds: {{ .Values.cluster.tezAm.autoscaling.gracePeriodSeconds }} {{- end }} diff --git a/packaging/src/kubernetes/helm/hive-operator/values.yaml b/packaging/src/kubernetes/helm/hive-operator/values.yaml index a0823f90de0d..a16a17e320e6 100644 --- a/packaging/src/kubernetes/helm/hive-operator/values.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/values.yaml @@ -119,6 +119,8 @@ cluster: minReplicas: 1 scaleUpThreshold: 75 scaleDownThreshold: 30 + # targetCpuValue: "750m" # Uncomment to enable CPU-based scaling (AverageValue) + # activationCpuValue: "200m" # CPU trigger inactive below this value cooldownSeconds: 300 gracePeriodSeconds: 60 # Set to use an external Metastore instead of deploying one: @@ -144,6 +146,8 @@ cluster: minReplicas: 0 scaleUpThreshold: 80 scaleDownThreshold: 20 + # targetCpuValue: "1600m" # Uncomment to enable CPU-based scaling (AverageValue) + # activationCpuValue: "400m" # CPU trigger inactive below this value cooldownSeconds: 600 gracePeriodSeconds: 300 @@ -186,12 +190,13 @@ cluster: # Autoscaling (requires KEDA + Prometheus in the cluster) # minReplicas: 0 enables scale-to-zero — wakes when HS2 receives queries # When enabled, 'replicas' above acts as the max replica ceiling - # scaleUpThreshold: with CPU resources set → CPU% (e.g., 60 = 60% utilization); - # without CPU resources → pending tasks per AM (e.g., 5 = scale when 5+ tasks waiting) + # scaleUpThreshold: pending tasks per AM (e.g., 5 = scale when 5+ tasks waiting) autoscaling: enabled: false minReplicas: 0 scaleUpThreshold: 5 scaleDownThreshold: 10 + # targetCpuValue: "600m" # Uncomment to enable CPU-based scaling (AverageValue) + # activationCpuValue: "100m" # CPU trigger inactive below this value cooldownSeconds: 600 gracePeriodSeconds: 120 diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java index f5efb1302bd3..9e9c782e729e 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java @@ -30,6 +30,8 @@ import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.util.Labels; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Unified KEDA ScaledObject dependent resource for metric-based autoscaling. @@ -41,6 +43,8 @@ */ public abstract class HiveScaledObjectDependent extends HiveGenericDependentResource { + private static final Logger LOG = LoggerFactory.getLogger(HiveScaledObjectDependent.class); + private final String component; private final String targetKind; @@ -113,7 +117,9 @@ protected abstract List> getTriggers( * HiveServer2 ScaledObject: scales on hs2_active_sessions + CPU. */ public static class HiveServer2 extends HiveScaledObjectDependent { - public HiveServer2() { super("hiveserver2", "Deployment"); } + public HiveServer2() { + super("hiveserver2", "Deployment"); + } @Override protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { @@ -153,15 +159,20 @@ protected List> getTriggers( "activationThreshold", "0" ) )); - if (hiveCluster.getSpec().hiveServer2().resources() != null) { - triggers.add(Map.of( - "type", "cpu", - "metricType", "Utilization", - "metadata", Map.of( - "value", String.valueOf(autoscaling.scaleUpThreshold()), - "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) - ) - )); + if (autoscaling.targetCpuValue() != null && autoscaling.activationCpuValue() != null) { + if (hiveCluster.getSpec().hiveServer2().resources() != null) { + triggers.add(Map.of( + "type", "cpu", + "metricType", "AverageValue", + "metadata", Map.of( + "value", autoscaling.targetCpuValue(), + "activationValue", autoscaling.activationCpuValue() + ) + )); + } else { + LOG.warn("targetCpuValue is set for HiveServer2, but no pod resources are defined. " + + "Skipping CPU trigger to prevent erratic scaling."); + } } // When scale-to-zero is enabled, add KEDA HTTP Add-on external-push // trigger to wake HS2 from 0 when requests arrive at the interceptor. @@ -184,7 +195,9 @@ protected List> getTriggers( * Metastore ScaledObject: scales on open_connections + CPU. */ public static class Metastore extends HiveScaledObjectDependent { - public Metastore() { super("metastore", "Deployment"); } + public Metastore() { + super("metastore", "Deployment"); + } @Override protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { @@ -224,15 +237,20 @@ protected List> getTriggers( "activationThreshold", "0" ) )); - if (hiveCluster.getSpec().metastore().resources() != null) { - triggers.add(Map.of( - "type", "cpu", - "metricType", "Utilization", - "metadata", Map.of( - "value", String.valueOf(autoscaling.scaleUpThreshold()), - "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) - ) - )); + if (autoscaling.targetCpuValue() != null && autoscaling.activationCpuValue() != null) { + if (hiveCluster.getSpec().metastore().resources() != null) { + triggers.add(Map.of( + "type", "cpu", + "metricType", "AverageValue", + "metadata", Map.of( + "value", autoscaling.targetCpuValue(), + "activationValue", autoscaling.activationCpuValue() + ) + )); + } else { + LOG.warn("targetCpuValue is set for Metastore, but no pod resources are defined. " + + "Skipping CPU trigger to prevent erratic scaling."); + } } return triggers; } @@ -243,7 +261,9 @@ protected List> getTriggers( * Scale-down is slow (preserves in-memory cache). */ public static class Llap extends HiveScaledObjectDependent { - public Llap() { super("llap", "StatefulSet"); } + public Llap() { + super("llap", "StatefulSet"); + } @Override protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { @@ -299,7 +319,9 @@ protected List> getTriggers( * Tez AMs run in a warm pool; claimed AMs consume CPU, idle ones do not. */ public static class TezAm extends HiveScaledObjectDependent { - public TezAm() { super("tezam", "StatefulSet"); } + public TezAm() { + super("tezam", "StatefulSet"); + } @Override protected AutoscalingSpec getAutoscalingSpec(HiveCluster hiveCluster) { @@ -329,15 +351,20 @@ protected List> getTriggers( String hs2TargetName = hiveCluster.getMetadata().getName() + "-hiveserver2"; String namespace = hiveCluster.getMetadata().getNamespace(); List> triggers = new ArrayList<>(); - if (hiveCluster.getSpec().tezAm().resources() != null) { - triggers.add(Map.of( - "type", "cpu", - "metricType", "Utilization", - "metadata", Map.of( - "value", String.valueOf(autoscaling.scaleUpThreshold()), - "activationValue", String.valueOf(autoscaling.scaleDownThreshold()) - ) - )); + if (autoscaling.targetCpuValue() != null && autoscaling.activationCpuValue() != null) { + if (hiveCluster.getSpec().tezAm().resources() != null) { + triggers.add(Map.of( + "type", "cpu", + "metricType", "AverageValue", + "metadata", Map.of( + "value", autoscaling.targetCpuValue(), + "activationValue", autoscaling.activationCpuValue() + ) + )); + } else { + LOG.warn("targetCpuValue is set for TezAM, but no pod resources are defined. " + + "Skipping CPU trigger to prevent erratic scaling."); + } triggers.add(buildHs2ActivationTrigger(namespace, hs2TargetName, maxReplicas)); } else { triggers.add(Map.of( diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java index f3a064f48b95..a191be2eceb8 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java @@ -25,7 +25,6 @@ import io.javaoperatorsdk.operator.api.config.informer.Informer; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.Labels; @@ -74,7 +73,9 @@ protected Service desired(HiveCluster hiveCluster, + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public static class HiveServer2 extends HiveServiceDependent { - public HiveServer2() { super("hiveserver2"); } + public HiveServer2() { + super("hiveserver2"); + } @Override protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { @@ -106,7 +107,9 @@ protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public static class Metastore extends HiveServiceDependent { - public Metastore() { super("metastore"); } + public Metastore() { + super("metastore"); + } @Override protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { @@ -131,7 +134,9 @@ protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public static class Llap extends HiveServiceDependent { - public Llap() { super("llap"); } + public Llap() { + super("llap"); + } @Override protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { @@ -153,7 +158,9 @@ protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + "app.kubernetes.io/managed-by=hive-kubernetes-operator") ) public static class TezAm extends HiveServiceDependent { - public TezAm() { super("tezam"); } + public TezAm() { + super("tezam"); + } @Override protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java index eb0980fb1a1d..388515a51eb8 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java @@ -31,14 +31,19 @@ public record AutoscalingSpec( @Default("0") Integer minReplicas, @JsonPropertyDescription("Threshold that triggers scale-up (component-specific: " - + "sessions for HS2, queue depth for LLAP, CPU% for TezAM with resources, " - + "pending tasks per AM for TezAM without resources)") + + "sessions for HS2, connections for HMS, queue depth for LLAP, " + + "pending tasks for TezAM)") @Default("80") Integer scaleUpThreshold, - @JsonPropertyDescription("Percentage threshold that triggers scale-down " - + "(all conditions must be met: metric below threshold AND CPU below threshold)") + @JsonPropertyDescription("Threshold that triggers scale-down for Prometheus-based metrics") @Default("20") Integer scaleDownThreshold, + @JsonPropertyDescription("Target CPU average value for scaling (e.g., '1500m' or '1'). " + + "If omitted, CPU scaling is disabled.") + String targetCpuValue, + @JsonPropertyDescription("CPU average value below which the trigger is inactive. " + + "Required if targetCpuValue is set.") + String activationCpuValue, @JsonPropertyDescription("Cooldown period in seconds after a scaling event before another can occur") @Default("600") Integer cooldownSeconds, diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java index 993b452ba4b4..89c629ecdea2 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java @@ -62,6 +62,6 @@ public record HiveServer2Spec( extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); externalJars = externalJars != null ? externalJars : List.of(); autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( - false, 0, 80, 20, 600, 300); + false, 0, 80, 20, null, null, 600, 300); } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java index 34cfc872f189..7a1951407afc 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java @@ -68,7 +68,7 @@ public record LlapSpec( extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( - false, 0, 1, 0, 900, 600); + false, 0, 1, 0, null, null, 900, 600); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java index 51dd6dea7259..f744b05b72f1 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java @@ -69,7 +69,7 @@ public record MetastoreSpec( extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( - false, 1, 75, 30, 300, 60); + false, 1, 75, 30, null, null, 300, 60); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java index 6a0008b9147d..9827b7a7d733 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java @@ -63,7 +63,7 @@ public record TezAmSpec( extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( - false, 0, 5, 10, 600, 120); + false, 0, 5, 10, null, null, 600, 120); } public boolean isEnabled() { From 9245804749c7932fae8947d747ad188645fb6845 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 28 May 2026 23:18:21 +0530 Subject: [PATCH 4/4] Fix HS2 Scaling Down --- .../operator/dependent/HiveScaledObjectDependent.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java index 9e9c782e729e..555a6bec9c82 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveScaledObjectDependent.java @@ -67,12 +67,8 @@ protected GenericKubernetesResource desired(HiveCluster hiveCluster, "kind", targetKind, "name", targetName )); - int minReplicaCount = Math.max(1, autoscaling.minReplicas()); - spec.put("minReplicaCount", minReplicaCount); + spec.put("minReplicaCount", autoscaling.minReplicas()); spec.put("maxReplicaCount", maxReplicas); - if (autoscaling.minReplicas() == 0) { - spec.put("idleReplicaCount", 0); - } spec.put("cooldownPeriod", autoscaling.cooldownSeconds()); spec.put("pollingInterval", getPollingInterval()); spec.put("advanced", getAdvanced(hiveCluster, autoscaling, maxReplicas));