Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions cmd/thv-operator/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ var (
setupLog = log.Log.WithName("setup")
)

// envEnableStorageVersionMigrator is the opt-in for the StorageVersionMigrator
// controller. The controller defaults to OFF in this release so the change can
// ship safely without functional impact. Set to "true" (or "1", "t") to enable.
// A follow-up release will flip the default to true alongside the helm chart
// surface and user docs.
// envEnableStorageVersionMigrator gates the StorageVersionMigrator controller.
// The binary itself defaults to OFF when the var is unset; the operator helm
// chart sets it to "true" by default, so chart-based installs run the migrator
// unless the operator explicitly opts out. Set to "true" (or "1", "t") to
// enable, "false" to disable.
const envEnableStorageVersionMigrator = "TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR"

func init() {
Expand Down Expand Up @@ -197,10 +197,10 @@ func setupStorageVersionMigrator(mgr ctrl.Manager) error {
}

// isStorageVersionMigratorEnabled reports whether the StorageVersionMigrator
// controller should be registered. Defaults to false in this release — admins
// must explicitly opt in via TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR=true.
// An unparsable value returns an error so startup fails loudly rather than
// silently disabling the feature an admin asked to turn on.
// controller should be registered. Defaults to false when
// TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR is unset; the operator helm chart
// sets it to "true" by default. An unparsable value returns an error so startup
// fails loudly rather than silently disabling the feature an admin asked to turn on.
func isStorageVersionMigratorEnabled() (bool, error) {
value, found := os.LookupEnv(envEnableStorageVersionMigrator)
if !found {
Expand Down
4 changes: 2 additions & 2 deletions deploy/charts/operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ The command removes all the Kubernetes components associated with the chart and
|-----|------|---------|-------------|
| fullnameOverride | string | `"toolhive-operator"` | Provide a fully-qualified name override for resources |
| nameOverride | string | `""` | Override the name of the chart |
| operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"defaultImagePullSecrets":[],"defaultRedis":{"addr":"","existingSecret":"","existingSecretKey":""},"env":[],"features":{"experimental":false,"storageVersionMigrator":false},"gc":{"gogc":75,"gomemlimit":"110MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.30.0","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["events.k8s.io"],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.30.0","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.30.0","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources |
| operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"defaultImagePullSecrets":[],"defaultRedis":{"addr":"","existingSecret":"","existingSecretKey":""},"env":[],"features":{"experimental":false,"storageVersionMigrator":true},"gc":{"gogc":75,"gomemlimit":"110MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.30.0","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["events.k8s.io"],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.30.0","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.30.0","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources |
| operator.affinity | object | `{}` | Affinity settings for the operator pod |
| operator.autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}` | Configuration for horizontal pod autoscaling |
| operator.autoscaling.enabled | bool | `false` | Enable autoscaling for the operator |
Expand All @@ -61,7 +61,7 @@ The command removes all the Kubernetes components associated with the chart and
| operator.defaultRedis.existingSecretKey | string | `""` | existingSecretKey is the key within existingSecret that holds the password. Empty means use global.redis.existingSecretKey or fall back to "redis-password". |
| operator.env | list | `[]` | Environment variables to set in the operator container. Supported toolhive-specific variables include: - TOOLHIVE_SKIP_UPDATE_CHECK: set to "true" to disable the operator's periodic update check against the ToolHive update API. Also disables the usage-metrics collection that is gated on the same check. |
| operator.features.experimental | bool | `false` | Enable experimental features |
| operator.features.storageVersionMigrator | bool | `false` | Enable the StorageVersionMigrator controller, which auto-cleans status.storedVersions on opted-in toolhive.stacklok.dev CRDs so a future release can drop deprecated versions (e.g. v1alpha1) without orphaning etcd objects in the cluster. Sets TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR in the operator deployment. |
| operator.features.storageVersionMigrator | bool | `true` | Enable the StorageVersionMigrator controller, which auto-cleans status.storedVersions on opted-in toolhive.stacklok.dev CRDs so a future release can drop deprecated versions (e.g. v1alpha1) without orphaning etcd objects in the cluster. Enabled by default; set to false to opt out and handle storage-version cleanup yourself. Sets TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR in the operator deployment. Requires `operator.rbac.scope=cluster` — the controller watches cluster-scoped CRDs and re-stores resources across all namespaces, so the chart rejects this being true when scope is namespace. |
| operator.gc | object | `{"gogc":75,"gomemlimit":"110MiB"}` | Go memory limits and garbage collection percentage for the operator container |
| operator.gc.gogc | int | `75` | Go garbage collection percentage for the operator container |
| operator.gc.gomemlimit | string | `"110MiB"` | Go memory limits for the operator container |
Expand Down
22 changes: 21 additions & 1 deletion deploy/charts/operator/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,24 @@ Common labels for the toolhive resources
{{- define "toolhive.labels" -}}
app: toolhive
app.kubernetes.io/name: toolhive
{{- end }}
{{- end }}

{{/*
Validate feature-flag / RBAC-scope combinations and fail the render early with
an actionable message rather than deploying a wedged operator.

The StorageVersionMigrator controller watches cluster-scoped
CustomResourceDefinition objects and re-stores custom resources across every
namespace, so it requires cluster-scoped RBAC and a cluster-scoped manager
cache. A namespace-scoped operator (operator.rbac.scope=namespace) gets only
per-namespace RoleBindings and a namespace-restricted cache, so the controller
cannot sync its CRD informer and would prevent the manager from starting. We
fail loudly here instead of silently dropping the feature, because a silent
drop would let a namespace-scoped admin believe storedVersions are being kept
clean when they are not.
*/}}
{{- define "toolhive-operator.validateStorageVersionMigrator" -}}
{{- if and .Values.operator.features.storageVersionMigrator (ne .Values.operator.rbac.scope "cluster") -}}
{{- fail "operator.features.storageVersionMigrator requires operator.rbac.scope=cluster: the StorageVersionMigrator controller watches cluster-scoped CustomResourceDefinitions and re-stores resources across all namespaces, which a namespace-scoped operator cannot do. Set operator.features.storageVersionMigrator=false for namespace-scoped installs." -}}
{{- end -}}
{{- end -}}
1 change: 1 addition & 0 deletions deploy/charts/operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- include "toolhive-operator.validateStorageVersionMigrator" . }}
apiVersion: apps/v1
kind: Deployment
metadata:
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/operator/tests/default_install_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ tests:
template: deployment.yaml
asserts:
- contains: { path: 'spec.template.spec.containers[0].env', content: { name: ENABLE_EXPERIMENTAL_FEATURES, value: "false" } }
- contains: { path: 'spec.template.spec.containers[0].env', content: { name: TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR, value: "false" } }
- contains: { path: 'spec.template.spec.containers[0].env', content: { name: TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR, value: "true" } }
- contains: { path: 'spec.template.spec.containers[0].env', content: { name: UNSTRUCTURED_LOGS, value: "false" } }
- contains: { path: 'spec.template.spec.containers[0].env', content: { name: TOOLHIVE_USE_CONFIGMAP, value: "true" } }
- contains: { path: 'spec.template.spec.containers[0].env', content: { name: GOGC, value: "75" } }
Expand Down
49 changes: 44 additions & 5 deletions deploy/charts/operator/tests/feature_flags_test.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,58 @@
suite: opt-in feature flags
# The experimental and storage-version-migrator features are quoted-boolean env
# vars. A dropped quote or a renamed var silently disables the feature, so pin
# both the enabled and (in the baseline suite) disabled states.
set:
operator.features.experimental: true
operator.features.storageVersionMigrator: true
# vars. A dropped quote or a renamed var silently flips the feature, so pin both
# the enabled and disabled states. storageVersionMigrator defaults to true, so
# the disabled state is the one the baseline suite can't cover — pin it here.
# The migrator also requires cluster-scoped RBAC, so the chart rejects it in
# namespace scope; pin that validation too.
templates:
- deployment.yaml
tests:
- it: flips both feature-flag env vars to the quoted string "true"
template: deployment.yaml
set:
operator.features.experimental: true
operator.features.storageVersionMigrator: true
asserts:
- contains:
path: 'spec.template.spec.containers[0].env'
content: { name: ENABLE_EXPERIMENTAL_FEATURES, value: "true" }
- contains:
path: 'spec.template.spec.containers[0].env'
content: { name: TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR, value: "true" }

- it: opts out of the storage version migrator when set to false
template: deployment.yaml
set:
operator.features.storageVersionMigrator: false
asserts:
- contains:
path: 'spec.template.spec.containers[0].env'
content: { name: TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR, value: "false" }

# The migrator watches cluster-scoped CRDs, so it must never be enabled in a
# namespace-scoped install. The chart fails the render rather than deploying a
# wedged operator. Pin both the rejection and the supported opt-out.
- it: rejects the storage version migrator in namespace scope
template: deployment.yaml
set:
operator.rbac.scope: namespace
operator.rbac.allowedNamespaces: [toolhive-system]
operator.features.storageVersionMigrator: true
asserts:
- failedTemplate:
errorPattern: "operator.features.storageVersionMigrator requires operator.rbac.scope=cluster"

- it: allows namespace scope when the storage version migrator is disabled
template: deployment.yaml
set:
operator.rbac.scope: namespace
operator.rbac.allowedNamespaces: [toolhive-system]
operator.features.storageVersionMigrator: false
asserts:
- contains:
path: 'spec.template.spec.containers[0].env'
content: { name: TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR, value: "false" }
- contains:
path: 'spec.template.spec.containers[0].env'
content: { name: WATCH_NAMESPACE, value: "toolhive-system" }
3 changes: 3 additions & 0 deletions deploy/charts/operator/tests/namespace_scope_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ suite: namespace-scoped install
set:
operator.rbac.scope: namespace
operator.rbac.allowedNamespaces: [team-a, team-b]
# storageVersionMigrator requires cluster scope; the chart rejects it under
# namespace scope (see feature_flags_test.yaml), so disable it here.
operator.features.storageVersionMigrator: false
templates:
- deployment.yaml
- clusterrole/rolebinding.yaml
Expand Down
8 changes: 6 additions & 2 deletions deploy/charts/operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ operator:
# -- Enable the StorageVersionMigrator controller, which auto-cleans
# status.storedVersions on opted-in toolhive.stacklok.dev CRDs so a
# future release can drop deprecated versions (e.g. v1alpha1) without
# orphaning etcd objects in the cluster. Sets
# orphaning etcd objects in the cluster. Enabled by default; set to
# false to opt out and handle storage-version cleanup yourself. Sets
# TOOLHIVE_ENABLE_STORAGE_VERSION_MIGRATOR in the operator deployment.
storageVersionMigrator: false
# Requires `operator.rbac.scope=cluster` — the controller watches
# cluster-scoped CRDs and re-stores resources across all namespaces, so
# the chart rejects this being true when scope is namespace.
storageVersionMigrator: true
# -- Number of replicas for the operator deployment
replicaCount: 1

Expand Down
Loading