diff --git a/helm/smooai-next/Chart.yaml b/helm/smooai-next/Chart.yaml index f1a956f..745f62b 100644 --- a/helm/smooai-next/Chart.yaml +++ b/helm/smooai-next/Chart.yaml @@ -23,7 +23,11 @@ type: application # (SMOODEV-1790 — dogfooding the chart for smooai apps/web). # 0.2.1: PDB sets unhealthyPodEvictionPolicy (default AlwaysAllow) — kube-linter # flags its absence; matches the prior hand-rolled PDB. -version: 0.2.1 +# 0.2.2: expose hpa.memoryTarget — optional second HPA Resource metric (memory +# averageUtilization) so memory-pressured Next.js pods scale out instead +# of riding into a V8 heap-limit OOM (SMOODEV-1965). Default 0 = CPU-only, +# backward-compatible. +version: 0.2.2 # Tracks the consuming app image version (override via image.tag). appVersion: "0.1.0" home: https://github.com/SmooAI/deploy diff --git a/helm/smooai-next/README.md b/helm/smooai-next/README.md index d02a50c..871dbc3 100644 --- a/helm/smooai-next/README.md +++ b/helm/smooai-next/README.md @@ -28,7 +28,7 @@ helm/smooai-next/ │ ├── _helpers.tpl │ ├── deployment.yaml # node server.js; graceful shutdown; /api/health probes; S3 cache env │ ├── service.yaml # ClusterIP, port → http (3000) -│ ├── hpa.yaml # HPA (min/max/cpuTarget) +│ ├── hpa.yaml # HPA (min/max/cpuTarget; optional memoryTarget) │ ├── pdb.yaml # PodDisruptionBudget (minAvailable) │ ├── serviceaccount.yaml # IRSA-annotated ServiceAccount │ ├── ingress.yaml # dedicated internet-facing ALB = the CloudFront origin diff --git a/helm/smooai-next/templates/hpa.yaml b/helm/smooai-next/templates/hpa.yaml index 8618463..6f2d4c4 100644 --- a/helm/smooai-next/templates/hpa.yaml +++ b/helm/smooai-next/templates/hpa.yaml @@ -19,6 +19,19 @@ spec: target: type: Utilization averageUtilization: {{ .Values.hpa.cpuTarget }} + {{- with .Values.hpa.memoryTarget }} + # Optional memory-based scale-out (averageUtilization is % of the memory + # REQUEST). The HPA scales UP when EITHER metric is hot and DOWN only when + # BOTH are cool — so memory-pressured Next.js pods add replicas instead of + # riding individual pods into a V8 heap-limit OOM. `with` skips this when + # memoryTarget is unset/0, keeping the default CPU-only behavior unchanged. + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ . }} + {{- end }} {{- with .Values.hpa.behavior }} # Optional v2 scaling behavior (scaleUp/scaleDown stabilization + policies). behavior: diff --git a/helm/smooai-next/values.yaml b/helm/smooai-next/values.yaml index 0b1737e..7bbc041 100644 --- a/helm/smooai-next/values.yaml +++ b/helm/smooai-next/values.yaml @@ -45,6 +45,14 @@ hpa: min: 2 max: 10 cpuTarget: 70 + # Optional memory-based scale-out target — averageUtilization as a % of the + # memory REQUEST (resources.requests.memory below). Unset/0 = CPU-only (the + # default; backward-compatible). Set e.g. 80 to add a second HPA metric so a + # memory-pressured Next.js workload scales out instead of riding each pod into + # a V8 heap-limit OOM. The HPA scales UP when EITHER cpu or memory is hot and + # DOWN only when BOTH are cool, so set this comfortably ABOVE steady-state + # (memory-used ÷ memory-request) or it will pin the deployment at max. + memoryTarget: 0 # Optional autoscaling/v2 scaling behavior (scaleUp/scaleDown stabilization # windows + policies). Empty = the autoscaler's built-in defaults. Example: # behavior: