Skip to content
2 changes: 2 additions & 0 deletions helm/blueapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ A Helm chart deploying a worker pod that runs Bluesky plans
| podAnnotations | object | `{}` | |
| podLabels | object | `{}` | |
| podSecurityContext | object | `{}` | |
| pvcautodeletion.enabled | bool | `true` | |
| readinessProbe | object | `{"failureThreshold":2,"httpGet":{"path":"/healthz","port":"http"},"periodSeconds":10}` | Readiness probe, if configured kubernetes will not route traffic to this pod if failed consecutively. This could allow the service time to recover if it is being overwhelmed by traffic, but without the to ability to load balance or scale up/outwards, upstream services will need to know to back off. This is automatically disabled when in debug mode. |
| resources | object | `{"limits":{"cpu":"2000m","memory":"4000Mi"},"requests":{"cpu":"200m","memory":"400Mi"}}` | Sets the compute resources available to the pod. These defaults are appropriate when using debug mode or an internal PVC and therefore running VS Code server in the pod. In the Diamond cluster, requests must be >= 0.1*limits When not using either of the above, the limits may be lowered. When idle but connected, blueapi consumes ~400MB of memory and 1% cpu and may struggle when allocated less. |
| restartOnConfigChange | bool | `true` | If enabled the blueapi pod will restart on changes to `worker` |
Expand All @@ -44,6 +45,7 @@ A Helm chart deploying a worker pod that runs Bluesky plans
| serviceAccount.create | bool | `false` | |
| serviceAccount.name | string | `""` | |
| startupProbe | object | `{"failureThreshold":5,"httpGet":{"path":"/healthz","port":"http"},"periodSeconds":10}` | A more lenient livenessProbe to allow the service to start fully. This is automatically disabled when in debug mode. |
| timeStampCron.enabled | bool | `true` | |
| tolerations | list | `[]` | May be required to run on specific nodes (e.g. the control machine) |
| tracing | object | `{"fastapi":{"excludedURLs":"/healthz"},"otlp":{"enabled":false,"protocol":"http/protobuf","server":{"host":"http://opentelemetry-collector.tracing","port":4318}}}` | Exclude health probe requests from tracing by default to prevent spamming |
| volumeMounts | list | `[{"mountPath":"/config","name":"worker-config","readOnly":true}]` | Additional volumeMounts on the output StatefulSet definition. Define how volumes are mounted to the container referenced by using the same name. |
Expand Down
25 changes: 25 additions & 0 deletions helm/blueapi/files/scripts/pvc-deletion.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/sh
# Get all PVCs by running pods
ALL_PVCS=$(kubectl get pvc -n $RELEASE_NAMESPACE -o=jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | sort -u)
NOW=$(date +%s)
#loop through all pvcs.
for pvc in $ALL_PVCS; do
#check if pvc has last-used annotation
if kubectl get pvc $pvc -n $RELEASE_NAMESPACE -o=jsonpath='{.metadata.annotations.last-used}'
then
#get last used annotation and check if it is more than three months ago (2628000 seconds)
LAST_USED=$(kubectl get pvc $pvc -n $RELEASE_NAMESPACE -o=jsonpath='{.metadata.annotations.last-used}')
if [ $(($NOW - LAST_USED)) -gt 2628000 ]; then
#checking if the pvc is protected, if it is protected skip deletion
if [ "$(kubectl get pvc $pvc -n $RELEASE_NAMESPACE -o=jsonpath='{.metadata.annotations.protected}')" = "true" ]; then
echo "PVC $pvc is protected, skipping deletion"
continue
fi
#PVC has not been used for more than three months, delete it
kubectl delete pvc "$pvc" -n $RELEASE_NAMESPACE
fi
else
echo "PVC $pvc does not have last-used annotation, skipping deletion"
fi
done
s
9 changes: 9 additions & 0 deletions helm/blueapi/files/scripts/time-stamper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/sh
# Get all PVCs currently mounted by running pods
MOUNTED_PVCS=$(kubectl get pods -n $RELEASE_NAMESPACE \
-o=jsonpath='{.items[*].spec.volumes[*].persistentVolumeClaim.claimName}' | tr ' ' '\n' | sort -u)
NOW=$(date +%s)
#loop through all the pvcs annotating ones thare are mounted
for pvc in $MOUNTED_PVCS; do
kubectl annotate --overwrite pvc "$pvc" -n $RELEASE_NAMESPACE last-used="$NOW"
done
24 changes: 24 additions & 0 deletions helm/blueapi/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,30 @@ data:
init_config.yaml: |-
scratch:
{{- toYaml .Values.worker.scratch | nindent 6 }}

---
{{- end }}

---
{{- if .Values.timeStampCron.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name : {{include "blueapi.fullname" . }}-pvc-stamper-script
data:
{{- $files := .Files }}
time-stamper.sh: |-
{{ $files.Get "files/scripts/time-stamper.sh" | indent 4 }}
{{- end }}

---
{{- if .Values.pvcautodeletion.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name : {{include "blueapi.fullname" . }}-pvc-autodeletion-script
data:
{{- $files := .Files }}
pvc-deletion.sh: |-
{{ $files.Get "files/scripts/pvc-deletion.sh" | indent 4 }}
{{- end }}
159 changes: 159 additions & 0 deletions helm/blueapi/templates/cronjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
{{- if .Values.timeStampCron.enabled }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "blueapi.fullname" . }}-last-used-stamper
namespace: {{ .Release.Namespace }}
automountServiceAccountToken: true
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "blueapi.fullname" . }}-last-used-stamper
namespace: {{ .Release.Namespace }}
rules:
- apiGroups: [""]
resources: ["pods", "persistentvolumeclaims"]
verbs: ["get", "list", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "blueapi.fullname" . }}-last-used-stamper
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: {{ include "blueapi.fullname" . }}-last-used-stamper
namespace: {{ .Release.Namespace }}
roleRef:
kind: Role
name: {{ include "blueapi.fullname" . }}-last-used-stamper
apiGroup: rbac.authorization.k8s.io
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ include "blueapi.fullname" . }}-last-used-stamper
namespace: {{ .Release.Namespace }}
spec:
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
schedule: "*/5 * * * *"

jobTemplate:
spec:
# amount of attempts of labeling a pvc
backoffLimit: 3
# job stops after 60 seconds
activeDeadlineSeconds: 60
template:
spec:
serviceAccountName: {{ include "blueapi.fullname" . }}-last-used-stamper
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}

volumes:
- name : {{include "blueapi.fullname" . }}-pvc-stamper-script
configMap:
name: {{include "blueapi.fullname" . }}-pvc-stamper-script
defaultMode: 0555


containers:
- name: last-used-stamper
env:
- name: RELEASE_NAME
value: {{ .Release.Name }}
- name: RELEASE_NAMESPACE
value: {{ .Release.Namespace }}
volumeMounts:
- name: {{include "blueapi.fullname" . }}-pvc-stamper-script
mountPath: /scripts
image: bitnami/kubectl:latest
imagePullPolicy: IfNotPresent
command: ["/scripts/time-stamper.sh"]
restartPolicy: OnFailure
{{- end }}
{{- if .Values.pvcautodeletion.enabled }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "blueapi.fullname" . }}-pvcautodeletion
namespace: {{ .Release.Namespace }}
automountServiceAccountToken: true
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "blueapi.fullname" . }}-pvcautodeletion
namespace: {{ .Release.Namespace }}
rules:
- apiGroups: [""]
resources: ["pods", "persistentvolumeclaims"]
verbs: ["get", "list", "patch","delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "blueapi.fullname" . }}-pvcautodeletion
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: {{ include "blueapi.fullname" . }}-pvcautodeletion
namespace: {{ .Release.Namespace }}
roleRef:
kind: Role
name: {{ include "blueapi.fullname" . }}-pvcautodeletion
apiGroup: rbac.authorization.k8s.io
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ include "blueapi.fullname" . }}-pvcautodeletion
namespace: {{ .Release.Namespace }}
spec:
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
schedule: "@weekly"

jobTemplate:
spec:
# amount of attempts of labeling a pvc
backoffLimit: 3
# job stops after 300 seconds
activeDeadlineSeconds: 300
template:
spec:
serviceAccountName: {{ include "blueapi.fullname" . }}-pvcautodeletion
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}

volumes:
- name : {{include "blueapi.fullname" . }}-pvc-autodeletion-script
configMap:
name: {{include "blueapi.fullname" . }}-pvc-autodeletion-script
defaultMode: 0555


containers:
- name: pvcautodeletion
env:
- name: RELEASE_NAME
value: {{ .Release.Name }}
- name: RELEASE_NAMESPACE
value: {{ .Release.Namespace }}
volumeMounts:
- name: {{include "blueapi.fullname" . }}-pvc-autodeletion-script
mountPath: /scripts
image: bitnami/kubectl:latest
imagePullPolicy: IfNotPresent
command: ["/scripts/pvc-deletion.sh"]
restartPolicy: OnFailure
{{- end }}
16 changes: 16 additions & 0 deletions helm/blueapi/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,14 @@
"podSecurityContext": {
"type": "object"
},
"pvcautodeletion": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
}
}
},
"readinessProbe": {
"description": "Readiness probe, if configured kubernetes will not route traffic to this pod if failed consecutively. This could allow the service time to recover if it is being overwhelmed by traffic, but without the to ability to load balance or scale up/outwards, upstream services will need to know to back off. This is automatically disabled when in debug mode.",
"type": "object",
Expand Down Expand Up @@ -292,6 +300,14 @@
}
}
},
"timeStampCron": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
}
}
},
"tolerations": {
"description": "May be required to run on specific nodes (e.g. the control machine)",
"type": "array"
Expand Down
6 changes: 6 additions & 0 deletions helm/blueapi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ initContainer:
# -- Size of persistent volume
size: "1Gi"

timeStampCron:
enabled: true

pvcautodeletion:
enabled: true

debug:
# -- If enabled, runs debugpy, allowing port-forwarding to expose port 5678 or attached vscode instance
enabled: false
Expand Down
Loading