From d1e2f4878679e9ca2bbde04b626b5c9cf275e629 Mon Sep 17 00:00:00 2001 From: ProcessEngine Bot Date: Tue, 20 May 2025 07:28:48 +0000 Subject: [PATCH 01/58] Release v1.7.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Changelog v1.7.0 (20.05.2025) Dieser Changelog deckt die Änderungen zwischen folgenden Versionen ab: [v1.6.1 und v1.7.0](https://github.com/5minds/ProcessCube.Deployment/compare/v1.6.1...v1.7.0). Weitere Hinweise befinden sich im Changelog der vorherigen Version: [v1.6.1](https://github.com/5minds/ProcessCube.Deployment/releases/tag/v1.6.1). ## Merged Pull Requests - none [skip ci] From 72fdea3bfdaeaf0f7e7a43c0e137f008da298047 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Tue, 20 May 2025 09:28:46 +0200 Subject: [PATCH 02/58] bump version to 1.8.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index c3f46c4..d435aa1 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { "name": "processcube_deployment", - "version": "1.7.0", + "version": "1.8.0", "description": "Deployment Repository of ProcessCube" } From 822606570db4f37f5c363f043cb8f3d93aa366bc Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Tue, 20 May 2025 09:30:27 +0200 Subject: [PATCH 03/58] fix postgres tag --- sample/base/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample/base/kustomization.yaml b/sample/base/kustomization.yaml index bc181fd..2e62ef9 100644 --- a/sample/base/kustomization.yaml +++ b/sample/base/kustomization.yaml @@ -15,4 +15,4 @@ images: - name: ghcr.io/5minds/processcube_lowcode newTag: 6.1.1 - name: postgres - newTag: 17 \ No newline at end of file + newTag: "17" \ No newline at end of file From 34a531688b86063dce23f483645cd65e790abd48 Mon Sep 17 00:00:00 2001 From: ProcessEngine Bot Date: Tue, 20 May 2025 07:31:02 +0000 Subject: [PATCH 04/58] Release v1.8.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Changelog v1.8.0 (20.05.2025) Dieser Changelog deckt die Änderungen zwischen folgenden Versionen ab: [v1.7.0 und v1.8.0](https://github.com/5minds/ProcessCube.Deployment/compare/v1.7.0...v1.8.0). Weitere Hinweise befinden sich im Changelog der vorherigen Version: [v1.7.0](https://github.com/5minds/ProcessCube.Deployment/releases/tag/v1.7.0). ## Merged Pull Requests - none [skip ci] From 69646abac1f836a240ba09f162fd9aafbfd99a65 Mon Sep 17 00:00:00 2001 From: ProcessEngine Bot Date: Tue, 20 May 2025 09:22:00 +0000 Subject: [PATCH 05/58] Release v1.8.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Changelog v1.8.1 (20.05.2025) Dieser Changelog deckt die Änderungen zwischen folgenden Versionen ab: [v1.8.0 und v1.8.1](https://github.com/5minds/ProcessCube.Deployment/compare/v1.8.0...v1.8.1). Weitere Hinweise befinden sich im Changelog der vorherigen Version: [v1.8.0](https://github.com/5minds/ProcessCube.Deployment/releases/tag/v1.8.0). ## Merged Pull Requests - none [skip ci] From 3781f21d616fa1ab66b688d1af9081197f7250dd Mon Sep 17 00:00:00 2001 From: ProcessEngine Bot Date: Tue, 20 May 2025 09:48:42 +0000 Subject: [PATCH 06/58] Release v1.8.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Changelog v1.8.2 (20.05.2025) Dieser Changelog deckt die Änderungen zwischen folgenden Versionen ab: [v1.8.1 und v1.8.2](https://github.com/5minds/ProcessCube.Deployment/compare/v1.8.1...v1.8.2). Weitere Hinweise befinden sich im Changelog der vorherigen Version: [v1.8.1](https://github.com/5minds/ProcessCube.Deployment/releases/tag/v1.8.1). ## Merged Pull Requests - none [skip ci] From f6e9af6cf7fc9508e0d787e6b6dabc936f905188 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Fri, 4 Jul 2025 09:43:17 +0200 Subject: [PATCH 07/58] update versions --- base/artifactShipper/kustomization.yaml | 1 - sample/base/kustomization.yaml | 4 ++-- .../dev}/artifactShipper/github-token-external-secret.yaml | 0 sample/overlays/dev/artifactShipper/kustomization.yaml | 5 +++++ 4 files changed, 7 insertions(+), 3 deletions(-) rename {base => sample/overlays/dev}/artifactShipper/github-token-external-secret.yaml (100%) create mode 100644 sample/overlays/dev/artifactShipper/kustomization.yaml diff --git a/base/artifactShipper/kustomization.yaml b/base/artifactShipper/kustomization.yaml index 826be91..925306a 100644 --- a/base/artifactShipper/kustomization.yaml +++ b/base/artifactShipper/kustomization.yaml @@ -2,5 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - github-token-external-secret.yaml - job.yaml \ No newline at end of file diff --git a/sample/base/kustomization.yaml b/sample/base/kustomization.yaml index c638de5..8f58836 100644 --- a/sample/base/kustomization.yaml +++ b/sample/base/kustomization.yaml @@ -11,8 +11,8 @@ images: - name: ghcr.io/5minds/processcube_authority newTag: 3.2.0 - name: ghcr.io/5minds/processcube_engine - newTag: 19.0.0-extensions-2.3.0 + newTag: 19.2.1-extensions-2.3.0 - name: ghcr.io/5minds/processcube_lowcode - newTag: 6.1.1 + newTag: 7.0.2 - name: postgres newTag: "17" \ No newline at end of file diff --git a/base/artifactShipper/github-token-external-secret.yaml b/sample/overlays/dev/artifactShipper/github-token-external-secret.yaml similarity index 100% rename from base/artifactShipper/github-token-external-secret.yaml rename to sample/overlays/dev/artifactShipper/github-token-external-secret.yaml diff --git a/sample/overlays/dev/artifactShipper/kustomization.yaml b/sample/overlays/dev/artifactShipper/kustomization.yaml new file mode 100644 index 0000000..90c59ea --- /dev/null +++ b/sample/overlays/dev/artifactShipper/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - github-token-external-secret.yaml \ No newline at end of file From 332276cd592b429f7d5919a3b358560b726fc18c Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Fri, 4 Jul 2025 09:44:58 +0200 Subject: [PATCH 08/58] bump version to 1.9.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index b068be0..6329617 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { "name": "processcube_deployment", - "version": "1.8.2", + "version": "1.9.0", "description": "Deployment Repository of ProcessCube" } From 4ef5efd2b200579049d5da27535c47bc76276d65 Mon Sep 17 00:00:00 2001 From: ProcessEngine Bot Date: Fri, 4 Jul 2025 07:45:17 +0000 Subject: [PATCH 09/58] Release v1.9.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Changelog v1.9.0 (04.07.2025) Dieser Changelog deckt die Änderungen zwischen folgenden Versionen ab: [v1.8.2 und v1.9.0](https://github.com/5minds/ProcessCube.Deployment/compare/v1.8.2...v1.9.0). Weitere Hinweise befinden sich im Changelog der vorherigen Version: [v1.8.2](https://github.com/5minds/ProcessCube.Deployment/releases/tag/v1.8.2). ## Merged Pull Requests - none [skip ci] From 20b2afe44a779899a2eaa88932ba1847dc0dda6b Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 7 Jul 2025 10:41:01 +0200 Subject: [PATCH 10/58] fix lowcode dashboard auth --- package.json | 2 +- sample/overlays/dev/authority/config.json | 29 ++++++++++++------- .../overlays/dev/lowcode/deployment-patch.yml | 16 ++++++++-- sample/overlays/dev/lowcode/ingress-patch.yml | 4 +-- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/package.json b/package.json index 6329617..e2c5124 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { "name": "processcube_deployment", - "version": "1.9.0", + "version": "1.9.1", "description": "Deployment Repository of ProcessCube" } diff --git a/sample/overlays/dev/authority/config.json b/sample/overlays/dev/authority/config.json index 16aaf5b..232c9f6 100644 --- a/sample/overlays/dev/authority/config.json +++ b/sample/overlays/dev/authority/config.json @@ -25,17 +25,24 @@ "redirect_uris": ["https://app.sample.dev.5minds.cloud"] }, { - "clientId": "nodered", - "clientSecret": "79C29A79-607D-452B-B4CA-AF79BF0D44E9", - "scope": "openid email profile", - "grant_types": ["authorization_code", "refresh_token"], - "response_types": ["code"], - "redirect_uris": [ - "https://nodered.sampleapp.dev.5minds.cloud/auth/strategy/callback", - "https://nodered.sampleapp.dev.5minds.cloud/auth/dashboard/callback" - ], - "post_logout_redirect_uris": ["https://nodered.sampleapp.dev.5minds.cloud"], - "corsOrigins": ["https://nodered.sampleapp.dev.5minds.cloud"] + "clientId": "LowCodeEditorClient", + "clientSecret": "79C29A79-607D-452B-B4CA-AF79BF0D44E9", + "scope": "openid email profile nodered lanes engine_read engine_write", + "grant_types": ["authorization_code", "refresh_token"], + "response_types": ["code"], + "redirect_uris": ["https://lowcode.sampleapp.dev.5minds.cloud/auth/strategy/callback"], + "post_logout_redirect_uris": ["https://lowcode.sampleapp.dev.5minds.cloud/"], + "corsOrigins": ["https://lowcode.sampleapp.dev.5minds.cloud"] + }, + { + "clientId": "LowCodeDashboardClient", + "clientSecret": "05844EF2-08B9-473D-8FD1-9A96289F5304", + "scope": "openid email profile lanes engine_read engine_write engine_admin", + "grant_types": ["authorization_code", "refresh_token"], + "response_types": ["code"], + "redirect_uris": ["https://lowcode.sampleapp.dev.5minds.cloud/auth/dashboard/callback"], + "post_logout_redirect_uris": ["https://lowcode.sampleapp.dev.5minds.cloud/dashboard"], + "corsOrigins": ["https://lowcode.sampleapp.dev.5minds.cloud"] } ], "database": { diff --git a/sample/overlays/dev/lowcode/deployment-patch.yml b/sample/overlays/dev/lowcode/deployment-patch.yml index d8671ef..48a44c9 100644 --- a/sample/overlays/dev/lowcode/deployment-patch.yml +++ b/sample/overlays/dev/lowcode/deployment-patch.yml @@ -7,14 +7,24 @@ path: "/spec/template/spec/containers/0/env/-" #resource we want to change value: name: NODERED_BASE_URL - value: "https://nodered.sampleapp.dev.5minds.cloud" + value: "https://lowcode.sampleapp.dev.5minds.cloud" - op: add #action path: "/spec/template/spec/containers/0/env/-" #resource we want to change value: name: NODERED_CLIENT_ID - value: "nodered" + value: "LowCodeEditorClient" - op: add #action path: "/spec/template/spec/containers/0/env/-" #resource we want to change value: name: NODERED_CLIENT_SECRET - value: "79C29A79-607D-452B-B4CA-AF79BF0D44E9" \ No newline at end of file + value: "79C29A79-607D-452B-B4CA-AF79BF0D44E9" +- op: add #action + path: "/spec/template/spec/containers/0/env/-" #resource we want to change + value: + name: NODERED_DASHBOARD_CLIENT_ID + value: "LowCodeDashboardClient" +- op: add #action + path: "/spec/template/spec/containers/0/env/-" #resource we want to change + value: + name: NODERED_DASHBOARD_CLIENT_SECRET + value: "05844EF2-08B9-473D-8FD1-9A96289F5304" \ No newline at end of file diff --git a/sample/overlays/dev/lowcode/ingress-patch.yml b/sample/overlays/dev/lowcode/ingress-patch.yml index 2c79137..a70977c 100644 --- a/sample/overlays/dev/lowcode/ingress-patch.yml +++ b/sample/overlays/dev/lowcode/ingress-patch.yml @@ -1,6 +1,6 @@ - op: replace #action path: "/spec/rules/0/host" #resource we want to change - value: nodered.sampleapp.dev.5minds.cloud + value: lowcode.sampleapp.dev.5minds.cloud - op: replace #action path: "/spec/tls/0/hosts/0" #resource we want to change - value: nodered.sampleapp.dev.5minds.cloud \ No newline at end of file + value: lowcode.sampleapp.dev.5minds.cloud \ No newline at end of file From 8e26efae01dfbe1908d73f9a1d28b8e126365d63 Mon Sep 17 00:00:00 2001 From: ProcessEngine Bot Date: Mon, 7 Jul 2025 08:41:30 +0000 Subject: [PATCH 11/58] Release v1.9.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Changelog v1.9.1 (07.07.2025) Dieser Changelog deckt die Änderungen zwischen folgenden Versionen ab: [v1.9.0 und v1.9.1](https://github.com/5minds/ProcessCube.Deployment/compare/v1.9.0...v1.9.1). Weitere Hinweise befinden sich im Changelog der vorherigen Version: [v1.9.0](https://github.com/5minds/ProcessCube.Deployment/releases/tag/v1.9.0). ## Merged Pull Requests - none [skip ci] From ab7862de3c75c11bdad93acfad881ec1b3cc3bdf Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 22 Dec 2025 16:24:15 +0100 Subject: [PATCH 12/58] add hetzner deplyoment --- .gitignore | 26 + hetzner-setup/ProcessCube.Cloud/.gitignore | 30 + hetzner-setup/ProcessCube.Cloud/README.md | 612 ++++++++++++++++++ .../ProcessCube.Cloud/ansible/ansible.cfg | 13 + .../ansible/inventory/hosts.tpl | 29 + .../ansible/requirements.txt | 1 + .../ansible/roles/argocd/tasks/main.yml | 51 ++ .../ansible/roles/external_secrets/README.md | 117 ++++ .../roles/external_secrets/defaults/main.yml | 11 + .../roles/external_secrets/tasks/main.yml | 103 +++ .../ansible/roles/k3s_addons/tasks/main.yml | 215 ++++++ .../ansible/roles/k3s_ccm/tasks/main.yml | 81 +++ .../ansible/roles/k3s_master/tasks/main.yml | 230 +++++++ .../ansible/roles/k3s_user/tasks/main.yml | 41 ++ .../ansible/roles/k3s_worker/tasks/main.yml | 120 ++++ .../ansible/roles/tailscale/tasks/main.yml | 54 ++ .../ProcessCube.Cloud/ansible/site.yml | 111 ++++ hetzner-setup/ProcessCube.Cloud/main.tf | 283 ++++++++ hetzner-setup/ProcessCube.Cloud/outputs.tf | 53 ++ .../terraform.tfvars.example | 29 + hetzner-setup/ProcessCube.Cloud/variables.tf | 92 +++ 21 files changed, 2302 insertions(+) create mode 100644 .gitignore create mode 100644 hetzner-setup/ProcessCube.Cloud/.gitignore create mode 100644 hetzner-setup/ProcessCube.Cloud/README.md create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/requirements.txt create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/README.md create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/defaults/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/tailscale/tasks/main.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/site.yml create mode 100644 hetzner-setup/ProcessCube.Cloud/main.tf create mode 100644 hetzner-setup/ProcessCube.Cloud/outputs.tf create mode 100644 hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example create mode 100644 hetzner-setup/ProcessCube.Cloud/variables.tf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b52e709 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +.next +.vscode +node_modules +**.DS_Store +bin/ +obj/ +db/ +database +.env.local +.idea/ +tsconfig.tsbuildinfo +.vs/ +.react-email +google_oauth2_client_secrets.env +dist +# Python development +.python-version +/test-results/ +/playwright-report/ +/blob-report/ +/playwright/.cache/ + + +.env +1password-credentials.json +hosts \ No newline at end of file diff --git a/hetzner-setup/ProcessCube.Cloud/.gitignore b/hetzner-setup/ProcessCube.Cloud/.gitignore new file mode 100644 index 0000000..94faf27 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/.gitignore @@ -0,0 +1,30 @@ +# Terraform files +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +crash.log +crash.*.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Sensitive files +terraform.tfvars +*.tfvars +.terraformrc +terraform.rc + +# Kubeconfig +kubeconfig.yaml +*.kubeconfig + +# SSH keys +*.pem +*.key +id_rsa* + +# OS files +.DS_Store +Thumbs.db diff --git a/hetzner-setup/ProcessCube.Cloud/README.md b/hetzner-setup/ProcessCube.Cloud/README.md new file mode 100644 index 0000000..ff0f7b3 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/README.md @@ -0,0 +1,612 @@ +# ProcessCube K3s Cluster on Hetzner Cloud + +Terraform + Ansible configuration for deploying a production-ready K3s Kubernetes cluster on Hetzner Cloud. + +## Architecture + +- **1 Master Node**: K3s server with control plane +- **2 Worker Nodes**: K3s agents for workload execution (scalable) +- **Hetzner Cloud Controller Manager**: Native cloud integration for LoadBalancers and persistent volumes +- **Hetzner CSI Driver**: Dynamic volume provisioning +- **Nginx Ingress Controller**: DaemonSet configuration for high availability +- **cert-manager**: Automatic TLS certificate management with Let's Encrypt +- **Tailscale**: Secure mesh VPN for remote access to cluster nodes +- **Private Network**: Internal 10.0.0.0/16 network for cluster communication +- **Firewall**: Configured security rules for SSH, K8s API, HTTP/HTTPS + +## Prerequisites + +1. **Hetzner Cloud Account**: Sign up at https://www.hetzner.com/cloud +2. **Hetzner API Token**: Create one in the Hetzner Cloud Console under "Security" → "API Tokens" +3. **Terraform**: Install from https://www.terraform.io/downloads + ```bash + # macOS + brew install terraform + + # Linux + wget https://releases.hashicorp.com/terraform/1.6.0/terraform_1.6.0_linux_amd64.zip + unzip terraform_1.6.0_linux_amd64.zip + sudo mv terraform /usr/local/bin/ + ``` +4. **Ansible**: Install from https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html + ```bash + # macOS + brew install ansible + + # Linux (Ubuntu/Debian) + sudo apt update + sudo apt install ansible + + # Python pip (all platforms) + pip3 install ansible + # or use the requirements file: + cd ansible && pip3 install -r requirements.txt + ``` +5. **SSH Key**: Generate if you don't have one: + ```bash + ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa + ``` +6. **Tailscale Account** (optional): Sign up at https://tailscale.com for secure remote access + +## Configuration + +### 1. Create terraform.tfvars + +Create a `terraform.tfvars` file in this directory: + +```hcl +# Hetzner Cloud Configuration +hcloud_token = "YOUR_HETZNER_API_TOKEN" + +# Cluster Configuration +cluster_name = "processcube-k3s" +location = "fsn1" # Options: nbg1, fsn1, hel1 +server_type = "cx43" # Options: cx11, cx21, cx31, cx41, cx51 +worker_count = 2 + +# K3s Version +k3s_version = "v1.34.2+k3s1" + +# Hetzner Cloud Integrations +hcloud_csi_version = "v2.18.1" # CSI Driver for persistent volumes +hcloud_ccm_version = "v1.20.0" # Cloud Controller Manager + +# SSH Key Paths +ssh_public_key_path = "~/.ssh/id_rsa.pub" +ssh_private_key_path = "~/.ssh/id_rsa" + +# Let's Encrypt (for automatic HTTPS certificates) +letsencrypt_email = "your-email@example.com" + +# Tailscale (optional - for secure remote access) +tailscale_auth_key = "YOUR_TAILSCALE_AUTH_KEY" # See "Tailscale Setup" below +# tailscale_tags = "tag:k3s" # Optional: Uncomment to use tags +``` + +### 2. Tailscale Setup (Optional but Recommended) + +Tailscale provides secure remote access to your cluster nodes without exposing them to the public internet. + +**Create an Auth Key:** + +1. Go to https://login.tailscale.com/admin/settings/keys +2. Click **Generate auth key** +3. Configure the key: + - **Description**: `ProcessCube K3s Cluster` + - **Reusable**: ✅ Enable (allows multiple devices to use the same key) + - **Ephemeral**: ❌ Disable (nodes should persist in your network) + - **Pre-approved**: ✅ Enable (automatically approve devices) + - **Tags**: Add `tag:k3s` if you want to use ACL rules for this cluster +4. Click **Generate key** +5. Copy the key (starts with `tskey-auth-...`) +6. Add to your `terraform.tfvars`: + ```hcl + tailscale_auth_key = "tskey-auth-kXXXXXXXXXXXXXXXXXXXXXXXXX" + ``` + +**Benefits:** +- Secure SSH access from anywhere without VPN configuration +- Access cluster services via Tailscale IPs +- No need to expose SSH on public IPs +- Automatic encryption and authentication + +**Skip Tailscale:** If you don't want to use Tailscale, you can skip this step and access nodes via their public IPs. + +### 3. Server Types + +Choose your server type based on workload requirements: + +| Type | vCPUs | RAM | Price/month* | +|------|-------|-----|--------------| +| cx11 | 1 | 2GB | ~€4.15 | +| cx21 | 2 | 4GB | ~€6.40 | +| cx31 | 2 | 8GB | ~€12.40 | +| cx41 | 4 | 16GB | ~€23.40 | +| cx51 | 8 | 32GB | ~€44.40 | + +*Prices are approximate. Check current pricing at https://www.hetzner.com/cloud + +### 4. Locations + +- `nbg1` - Nuremberg, Germany +- `fsn1` - Falkenstein, Germany +- `hel1` - Helsinki, Finland + +## Deployment + +The deployment process uses Terraform to provision infrastructure and Ansible to configure K3s. + +### Initialize Terraform + +```bash +cd infrastructure/ProcessCube.Cloud +terraform init +``` + +### Plan Deployment + +```bash +terraform plan +``` + +### Deploy Cluster + +```bash +terraform apply +``` + +Type `yes` when prompted to confirm. + +**What happens during deployment:** +1. Terraform creates Hetzner Cloud resources (servers, network, firewall) +2. Terraform generates Ansible inventory with all configuration +3. Ansible installs Tailscale on all nodes (if configured) +4. Ansible installs and configures K3s master node +5. Ansible installs Hetzner Cloud Controller Manager (CCM) +6. Ansible joins worker nodes to the cluster +7. Ansible installs cluster addons: + - Hetzner CSI Driver (for persistent volumes) + - Nginx Ingress Controller (DaemonSet on all nodes) + - cert-manager (for automatic TLS certificates) + - Hetzner LoadBalancer (automatically created by CCM) +8. Ansible verifies all nodes are ready + +Deployment takes approximately 10-15 minutes. + +## Manual Ansible Execution + +If you need to re-run Ansible without recreating infrastructure: + +```bash +cd ansible +ansible-playbook -i inventory/hosts site.yml +``` + +Check connectivity first: +```bash +ansible all -i inventory/hosts -m ping +``` + +## Access the Cluster + +### Get Cluster Information + +```bash +terraform output +``` + +### Download kubeconfig + +```bash +terraform output -raw kubeconfig_command | bash +``` + +Or manually: + +```bash +ssh root@$(terraform output -raw master_ip) 'cat /etc/rancher/k3s/k3s.yaml' | \ + sed "s/127.0.0.1/$(terraform output -raw master_ip)/g" > kubeconfig.yaml +``` + +### Use kubectl + +```bash +export KUBECONFIG=./kubeconfig.yaml +kubectl get nodes +kubectl get pods -A +``` + +Expected output: +``` +NAME STATUS ROLES AGE VERSION +processcube-k3s-master Ready control-plane,master 5m v1.28.5+k3s1 +processcube-k3s-worker-1 Ready 4m v1.28.5+k3s1 +processcube-k3s-worker-2 Ready 4m v1.28.5+k3s1 +``` + +### SSH to Nodes + +```bash +# Master node +ssh root@$(terraform output -raw master_ip) + +# Worker nodes +ssh root@ +``` + +## Cluster Features + +### What's Included + +- ✅ **K3s v1.34.2+k3s1** - Lightweight Kubernetes distribution +- ✅ **Hetzner Cloud Controller Manager** - Native cloud integration +- ✅ **Hetzner CSI Driver** - Dynamic persistent volume provisioning +- ✅ **Nginx Ingress Controller** - DaemonSet configuration for HA +- ✅ **cert-manager** - Automatic TLS certificates with Let's Encrypt +- ✅ **Hetzner LoadBalancer** - Automatically provisioned for Ingress +- ✅ **Tailscale VPN** - Secure mesh networking (optional) +- ✅ **Private networking** - 10.0.0.0/16 internal network +- ✅ **Firewall configuration** - UFW rules on all nodes +- ✅ **Helm 3** - Installed on master node +- ✅ **Idempotent Ansible playbooks** - Safe to re-run + +### What's Disabled + +- ❌ **Traefik ingress controller** - Using Nginx instead +- ❌ **ServiceLB** - Using Hetzner LoadBalancer via CCM +- ❌ **K3s cloud provider** - Using external cloud-provider via CCM + +## Ansible Structure + +``` +ansible/ +├── ansible.cfg # Ansible configuration +├── site.yml # Main playbook orchestration +├── requirements.txt # Python dependencies +├── inventory/ +│ ├── hosts.tpl # Terraform template for inventory +│ └── hosts # Generated inventory (by Terraform) +└── roles/ + ├── tailscale/ # Tailscale VPN installation + │ └── tasks/main.yml + ├── k3s_master/ # K3s master node setup + │ └── tasks/main.yml + ├── k3s_ccm/ # Hetzner Cloud Controller Manager + │ └── tasks/main.yml + ├── k3s_worker/ # K3s worker node setup + │ └── tasks/main.yml + └── k3s_addons/ # Cluster addons (CSI, Ingress, cert-manager) + └── tasks/main.yml +``` + +## Customizing Ansible Playbooks + +### Modify K3s Installation + +Edit [ansible/roles/k3s_master/tasks/main.yml](ansible/roles/k3s_master/tasks/main.yml) or [ansible/roles/k3s_worker/tasks/main.yml](ansible/roles/k3s_worker/tasks/main.yml) + +### Add Additional Software + +Create new Ansible roles: + +```bash +cd ansible/roles +ansible-galaxy init my_custom_role +``` + +Then add it to `site.yml`: + +```yaml +- name: Install custom software + hosts: all + roles: + - my_custom_role +``` + +## Working with the Cluster + +### Using Tailscale for Remote Access + +If you configured Tailscale, your nodes are accessible via their Tailscale IPs: + +```bash +# View Tailscale machines +tailscale status + +# SSH via Tailscale +ssh root@ + +# You can also set up Tailscale on your local machine to access the cluster +``` + +### LoadBalancer and Ingress + +The Hetzner LoadBalancer is automatically created and configured: + +```bash +# Get LoadBalancer IP +kubectl get svc ingress-nginx-controller -n ingress-nginx + +# The LoadBalancer distributes traffic to all nodes (port 80/443) +``` + +### Deploy Your Applications with Ingress + +Example Ingress with automatic TLS: + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: my-app + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production +spec: + ingressClassName: nginx + tls: + - hosts: + - myapp.example.com + secretName: myapp-tls + rules: + - host: myapp.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: my-app + port: + number: 80 +``` + +Apply it: +```bash +kubectl apply -f my-app-ingress.yaml +``` + +cert-manager will automatically request and configure TLS certificates from Let's Encrypt. + +## Monitoring + +### View Cluster Status + +```bash +kubectl get nodes +kubectl top nodes +kubectl get pods -A +``` + +### Check K3s Service + +```bash +# On master +ssh root@$(terraform output -raw master_ip) +systemctl status k3s +journalctl -u k3s -f + +# On workers +ssh root@ +systemctl status k3s-agent +journalctl -u k3s-agent -f +``` + +### Ansible Logs + +Check Ansible output during `terraform apply` for any errors. + +## Cost Estimation + +**Monthly costs** (approximate): +- 1x Master (cx21): €6.40 +- 2x Workers (cx21): €12.80 +- 1x Load Balancer (lb11): €5.39 +- **Total: ~€24.59/month** + +Additional costs: +- Traffic: 20TB included (€1.19/TB after) +- Volumes: €0.0476/GB/month (if used) + +## Scaling + +### Add More Workers + +Edit `terraform.tfvars`: + +```hcl +worker_count = 3 # or more +``` + +Then apply: + +```bash +terraform apply +``` + +Terraform will: +1. Create new worker servers +2. Update Ansible inventory +3. Run Ansible to join new workers to cluster + +### Upgrade Server Type + +Edit `terraform.tfvars`: + +```hcl +server_type = "cx31" # upgrade from cx21 +``` + +Note: This will recreate the servers. Backup your data first! + +## Backup & Disaster Recovery + +### Backup etcd + +```bash +ssh root@$(terraform output -raw master_ip) +k3s etcd-snapshot save +``` + +Snapshots are stored in `/var/lib/rancher/k3s/server/db/snapshots/` + +### Download Backups + +```bash +scp root@$(terraform output -raw master_ip):/var/lib/rancher/k3s/server/db/snapshots/* ./backups/ +``` + +### Restore from Snapshot + +```bash +k3s server \ + --cluster-reset \ + --cluster-reset-restore-path=/var/lib/rancher/k3s/server/db/snapshots/snapshot-name +``` + +## Troubleshooting + +### Terraform Issues + +**Problem**: `Error: Error creating server` +- Check Hetzner API token is valid +- Verify server type is available in chosen location +- Check account limits in Hetzner Console + +### Ansible Issues + +**Problem**: Ansible cannot connect to servers +```bash +# Test SSH connectivity +ssh root@ + +# Check Ansible inventory +cat ansible/inventory/hosts + +# Test with Ansible ping +cd ansible +ansible all -i inventory/hosts -m ping +``` + +**Problem**: K3s installation fails +```bash +# SSH to the server and check logs +ssh root@ +journalctl -xeu k3s +# or for workers: +journalctl -xeu k3s-agent +``` + +### Nodes Not Joining + +1. Check master node is running: + ```bash + ssh root@$(terraform output -raw master_ip) 'systemctl status k3s' + ``` + +2. Check worker logs: + ```bash + ssh root@ 'journalctl -u k3s-agent -f' + ``` + +3. Verify network connectivity: + ```bash + ssh root@ 'ping -c 3 10.0.1.2' + ``` + +4. Re-run Ansible: + ```bash + cd ansible + ansible-playbook -i inventory/hosts site.yml + ``` + +### Get K3s Token + +```bash +ssh root@$(terraform output -raw master_ip) 'cat /var/lib/rancher/k3s/server/node-token' +``` + +Or use terraform: + +```bash +terraform output -raw k3s_token +``` + +### Firewall Issues + +Check UFW status: + +```bash +ssh root@$(terraform output -raw master_ip) 'ufw status' +``` + +## Cleanup + +### Destroy Cluster + +```bash +terraform destroy +``` + +Type `yes` to confirm. + +This will delete: +- All servers +- Load balancer +- Network +- Firewall +- SSH keys +- Generated Ansible inventory + +**Warning**: This action is irreversible. Backup any important data first! + +### Clean Local Files + +```bash +rm -f ansible/inventory/hosts +rm -f kubeconfig.yaml +``` + +## Security Considerations + +1. **SSH Access**: Consider restricting SSH access to specific IPs in the firewall rules +2. **API Access**: The Kubernetes API is publicly accessible. Use RBAC and network policies. +3. **Secrets**: Never commit `terraform.tfvars` or `*.tfstate` files to git +4. **API Token**: Keep your Hetzner API token secure +5. **Updates**: Regularly update K3s version for security patches +6. **Ansible**: SSH private key is used by Ansible - ensure it's properly secured + +## Advanced Configuration + +### Custom K3s Flags + +Edit the Ansible role to add custom flags: + +```yaml +# In ansible/roles/k3s_master/tasks/main.yml +- name: Install K3s master + shell: | + INSTALL_K3S_VERSION="{{ k3s_version }}" /tmp/k3s_install.sh server \ + --cluster-init \ + --your-custom-flag \ + --another-flag=value +``` + +### Configure Node Labels + +Add to Ansible playbook: + +```yaml +- name: Label nodes + shell: kubectl label node {{ inventory_hostname }} custom-label=value +``` + +## Support + +- **Hetzner Cloud Docs**: https://docs.hetzner.com/cloud/ +- **K3s Documentation**: https://docs.k3s.io/ +- **Terraform Hetzner Provider**: https://registry.terraform.io/providers/hetznercloud/hcloud/ +- **Ansible Documentation**: https://docs.ansible.com/ + +## License + +This configuration is part of ProcessCube.UG project. diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg b/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg new file mode 100644 index 0000000..a6d9cb8 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg @@ -0,0 +1,13 @@ +[defaults] +inventory = inventory/hosts +host_key_checking = False +retry_files_enabled = False +roles_path = roles +interpreter_python = auto_silent +forks = 1 +timeout = 60 + +[ssh_connection] +ssh_args = -o ControlMaster=auto -o ControlPersist=300s -o StrictHostKeyChecking=no -o ServerAliveInterval=30 -o ServerAliveCountMax=10 -o ConnectTimeout=30 +pipelining = True +control_path = /tmp/ansible-ssh-%%h-%%p-%%r diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl new file mode 100644 index 0000000..7f2ed36 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl @@ -0,0 +1,29 @@ +[k3s_master] +${master_public_ip} ansible_user=root ansible_ssh_private_key_file=${ssh_private_key_path} + +[k3s_workers] +%{ for ip in worker_public_ips ~} +${ip} ansible_user=root ansible_ssh_private_key_file=${ssh_private_key_path} +%{ endfor ~} + +[k3s_cluster:children] +k3s_master +k3s_workers + +[k3s_cluster:vars] +k3s_version=${k3s_version} +k3s_token=${k3s_token} +master_ip=${master_private_ip} +cluster_name=${cluster_name} +hcloud_token=${hcloud_token} +hcloud_csi_version=${hcloud_csi_version} +hcloud_ccm_version=${hcloud_ccm_version} +network_id=${network_id} +location=${location} +letsencrypt_email=${letsencrypt_email} +%{ if tailscale_auth_key != "" ~} +tailscale_auth_key=${tailscale_auth_key} +tailscale_tags=${tailscale_tags} +%{ endif ~} +onepassword_credentials_json=${onepassword_credentials_json} +ansible_python_interpreter=/usr/bin/python3 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/requirements.txt b/hetzner-setup/ProcessCube.Cloud/ansible/requirements.txt new file mode 100644 index 0000000..415e63e --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/requirements.txt @@ -0,0 +1 @@ +ansible>=2.15.0,<3.0.0 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml new file mode 100644 index 0000000..23cdbfe --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml @@ -0,0 +1,51 @@ +--- +- name: Create ArgoCD namespace + shell: kubectl create namespace argocd --dry-run=client -o yaml | kubectl apply -f - + changed_when: true + +- name: Install ArgoCD + shell: | + kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml + register: argocd_install + changed_when: "'created' in argocd_install.stdout or 'configured' in argocd_install.stdout" + +- name: Wait for ArgoCD pods to be ready + shell: | + ready_count=$(kubectl get pods -n argocd --field-selector=status.phase=Running --no-headers | wc -l) + total_count=$(kubectl get pods -n argocd --no-headers | wc -l) + if [ "$ready_count" -eq "$total_count" ] && [ "$total_count" -gt 0 ]; then + exit 0 + else + exit 1 + fi + args: + executable: /bin/bash + register: argocd_status + until: argocd_status.rc == 0 + retries: 60 + delay: 10 + changed_when: false + +- name: Get ArgoCD initial admin password + shell: kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d + register: argocd_password + changed_when: false + +- name: Display ArgoCD installation info + debug: + msg: + - "ArgoCD installed successfully!" + - "" + - "Access ArgoCD UI via port-forward:" + - " kubectl port-forward svc/argocd-server -n argocd 8080:443" + - " Then open: https://localhost:8080" + - "" + - "Login credentials:" + - " Username: admin" + - " Password: {{ argocd_password.stdout }}" + - "" + - "IMPORTANT: Change the admin password after first login!" + - "Using ArgoCD CLI:" + - " kubectl port-forward svc/argocd-server -n argocd 8080:443 &" + - " argocd login localhost:8080" + - " argocd account update-password" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/README.md b/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/README.md new file mode 100644 index 0000000..de5bc5f --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/README.md @@ -0,0 +1,117 @@ +# External Secrets Operator Role + +Diese Ansible-Rolle installiert den External Secrets Operator (Version 1.1.0) und konfiguriert die Integration mit 1Password Connect. + +## Voraussetzungen + +- Funktionierender K3s Cluster +- Helm installiert auf dem Master Node +- 1Password Connect Credentials JSON Datei + +## Konfiguration + +Die Konfiguration erfolgt über Terraform-Variablen, die automatisch an Ansible übergeben werden. + +### 1. 1Password Connect Credentials vorbereiten + +Lade die `1password-credentials.json` Datei von 1Password herunter und speichere sie lokal. + +### 2. Terraform-Variablen setzen + +Setze folgende Variablen in `terraform.tfvars`: + +```hcl +# 1Password Connect Configuration +onepassword_credentials_json = "/path/to/1password-credentials.json" +``` + +Diese Variable wird automatisch von Terraform an das Ansible-Inventory übergeben. + +**Wichtig:** Das `onepassword-connect-token` Secret wird NICHT global installiert, sondern muss pro Applikations-Namespace erstellt werden (siehe "Nach der Installation"). + +### 3. Optional: Versionen überschreiben + +Falls notwendig, kannst du die Versionen in `roles/external_secrets/defaults/main.yml` anpassen: + +```yaml +external_secrets_version: "1.1.0" +onepassword_connect_version: "2.0.5" +``` + +## Verwendung + +Die Rolle wird automatisch beim Ausführen von `site.yml` installiert: + +```bash +ansible-playbook -i inventory/hosts.yml site.yml +``` + +Um nur die External Secrets Operator Installation auszuführen: + +```bash +ansible-playbook -i inventory/hosts.yml site.yml --tags external_secrets +``` + +## Was wird installiert? + +1. **External Secrets Operator** (v1.1.0) + - Installiert via Helm Chart + - Namespace: `external-secrets` + +2. **1Password Connect** + - Installiert via Helm Chart (v2.0.5, App v1.8.1) + - Konfiguriert mit den bereitgestellten Credentials + - Verbindet sich mit dem External Secrets Operator + +## Nach der Installation + +### 1. onepassword-connect-token Secret pro Namespace erstellen + +Für jede Applikation/Namespace muss ein eigenes Token-Secret erstellt werden: + +```bash +kubectl create secret generic onepassword-connect-token \ + -n \ + --from-literal=token='' +``` + +### 2. SecretStore erstellen + +Nach dem Erstellen des Token-Secrets kannst du einen SecretStore erstellen: + +```yaml +apiVersion: external-secrets.io/v1 +kind: SecretStore +metadata: + name: processcube-ug +spec: + provider: + onepassword: + connectHost: http://onepassword-connect.external-secrets.svc.cluster.local:8080 + vaults: + "ProcessCube.UG": 1 + auth: + secretRef: + connectTokenSecretRef: + name: onepassword-connect-token + key: token +``` + +## Fehlerbehebung + +### Pods prüfen +```bash +kubectl get pods -n external-secrets +``` + +### Logs prüfen +```bash +kubectl logs -n external-secrets -l app.kubernetes.io/name=external-secrets +kubectl logs -n external-secrets -l app.kubernetes.io/name=connect +``` + +### SecretStore Status prüfen +```bash +kubectl get secretstore -A +kubectl describe secretstore processcube-ug -n +``` diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/defaults/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/defaults/main.yml new file mode 100644 index 0000000..03be839 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/defaults/main.yml @@ -0,0 +1,11 @@ +--- +# External Secrets Operator version +external_secrets_version: "1.1.0" + +# 1Password Connect settings +onepassword_connect_namespace: "external-secrets" +onepassword_connect_version: "2.0.5" + +# 1Password credentials (set via Terraform variables in inventory) +# These values are automatically passed from Terraform to Ansible +# Configure them in terraform.tfvars diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/tasks/main.yml new file mode 100644 index 0000000..8e942d0 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/external_secrets/tasks/main.yml @@ -0,0 +1,103 @@ +--- +- name: Create external-secrets namespace + shell: kubectl create namespace {{ onepassword_connect_namespace }} --dry-run=client -o yaml | kubectl apply -f - + changed_when: true + +- name: Install External Secrets Operator + shell: | + helm repo add external-secrets https://charts.external-secrets.io || true + helm repo update + helm upgrade --install external-secrets \ + external-secrets/external-secrets \ + -n {{ onepassword_connect_namespace }} \ + --version {{ external_secrets_version }} \ + --create-namespace \ + --wait + register: eso_install + changed_when: "'has been upgraded' in eso_install.stdout or 'has been installed' in eso_install.stdout" + +- name: Wait for External Secrets Operator to be ready + shell: | + ready_count=$(kubectl get pods -n {{ onepassword_connect_namespace }} --field-selector=status.phase=Running --no-headers | grep external-secrets | wc -l) + if [ "$ready_count" -ge 1 ]; then + exit 0 + else + exit 1 + fi + args: + executable: /bin/bash + register: eso_status + until: eso_status.rc == 0 + retries: 30 + delay: 10 + changed_when: false + +- name: Resolve 1Password credentials path + set_fact: + onepassword_credentials_full_path: "{{ onepassword_credentials_json | realpath }}" + when: onepassword_credentials_json != "" + delegate_to: localhost + +- name: Copy 1Password credentials to remote host + copy: + content: "{{ lookup('file', onepassword_credentials_full_path) }}" + dest: /tmp/1password-credentials.json + mode: '0600' + when: onepassword_credentials_json != "" + +- name: Install 1Password Connect + shell: | + helm repo add 1password https://1password.github.io/connect-helm-charts || true + helm repo update + helm upgrade --install onepassword-connect \ + 1password/connect \ + -n {{ onepassword_connect_namespace }} \ + --version {{ onepassword_connect_version }} \ + --set-file connect.credentials="/tmp/1password-credentials.json" \ + --set operator.create=false \ + --wait + when: onepassword_credentials_json != "" + register: op_connect_install + changed_when: "'has been upgraded' in op_connect_install.stdout or 'has been installed' in op_connect_install.stdout" + +- name: Wait for 1Password Connect to be ready + shell: | + ready_count=$(kubectl get pods -n {{ onepassword_connect_namespace }} --field-selector=status.phase=Running --no-headers | grep onepassword-connect | wc -l) + if [ "$ready_count" -ge 1 ]; then + exit 0 + else + exit 1 + fi + args: + executable: /bin/bash + register: op_connect_status + until: op_connect_status.rc == 0 + retries: 30 + delay: 10 + changed_when: false + when: onepassword_credentials_json != "" + +- name: Remove temporary 1Password credentials file + file: + path: /tmp/1password-credentials.json + state: absent + when: onepassword_credentials_json != "" + +- name: Display External Secrets installation info + debug: + msg: + - "External Secrets Operator {{ external_secrets_version }} installed successfully!" + - "" + - "1Password Connect installed and configured" + - "" + - "Check status with:" + - " kubectl get pods -n {{ onepassword_connect_namespace }}" + - "" + - "Next steps:" + - "1. Create onepassword-connect-token secret in each application namespace:" + - " kubectl create secret generic onepassword-connect-token \\" + - " -n \\" + - " --from-literal=token=''" + - "" + - "2. Create a SecretStore in your namespace:" + - " See deployment/base/secretstore/secretstore.yaml" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml new file mode 100644 index 0000000..b358bf2 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -0,0 +1,215 @@ +--- +- name: Wait for all nodes to be initialized by CCM + shell: | + for node in $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'); do + providerid=$(kubectl get node $node -o jsonpath='{.spec.providerID}') + if [[ -z "$providerid" || ! "$providerid" =~ ^hcloud:// ]]; then + exit 1 + fi + done + exit 0 + args: + executable: /bin/bash + register: providerid_check + until: providerid_check.rc == 0 + retries: 60 + delay: 5 + changed_when: false + +- name: Download Hetzner CSI Driver manifest + get_url: + url: https://raw.githubusercontent.com/hetznercloud/csi-driver/{{ hcloud_csi_version }}/deploy/kubernetes/hcloud-csi.yml + dest: /tmp/hcloud-csi.yml + mode: '0644' + +- name: Install Hetzner CSI Driver + shell: kubectl apply -f /tmp/hcloud-csi.yml + register: csi_install + changed_when: "'created' in csi_install.stdout or 'configured' in csi_install.stdout" + +- name: Wait for CSI Driver to be ready + shell: kubectl get pods -n kube-system -l app.kubernetes.io/name=hcloud-csi -o jsonpath='{.items[*].status.phase}' + register: csi_status + until: "'Running' in csi_status.stdout" + retries: 30 + delay: 10 + changed_when: false + +- name: Label all nodes for LoadBalancer targeting + shell: kubectl label nodes --all loadbalancer-target=true --overwrite + changed_when: true + +- name: Download Nginx Ingress Controller manifest + get_url: + url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.14.0/deploy/static/provider/cloud/deploy.yaml + dest: /tmp/ingress-nginx-deploy.yaml + mode: '0644' + +- name: Patch Nginx Ingress Controller to use DaemonSet + shell: | + # Convert Deployment to DaemonSet + sed -i 's/kind: Deployment/kind: DaemonSet/' /tmp/ingress-nginx-deploy.yaml + # Remove replicas field (not valid for DaemonSet) + sed -i '/replicas:/d' /tmp/ingress-nginx-deploy.yaml + # Remove strategy field (not valid for DaemonSet) + sed -i '/strategy:/,/type:/d' /tmp/ingress-nginx-deploy.yaml + # Use host network for direct port binding + sed -i '/dnsPolicy: ClusterFirst/i\ hostNetwork: true' /tmp/ingress-nginx-deploy.yaml + args: + executable: /bin/bash + changed_when: true + +- name: Install Nginx Ingress Controller as DaemonSet + shell: kubectl apply -f /tmp/ingress-nginx-deploy.yaml + register: nginx_install + changed_when: "'created' in nginx_install.stdout or 'configured' in nginx_install.stdout" + +- name: Wait for Nginx Ingress Controller to be ready on all nodes + shell: | + node_count=$(kubectl get nodes --no-headers | wc -l) + ready_count=$(kubectl get pods -n ingress-nginx -l app.kubernetes.io/component=controller --field-selector=status.phase=Running --no-headers | wc -l) + if [ "$ready_count" -eq "$node_count" ]; then + exit 0 + else + exit 1 + fi + args: + executable: /bin/bash + register: nginx_status + until: nginx_status.rc == 0 + retries: 30 + delay: 10 + changed_when: false + +- name: Annotate Nginx Ingress Service for Hetzner LoadBalancer + shell: | + kubectl annotate service ingress-nginx-controller \ + -n ingress-nginx \ + load-balancer.hetzner.cloud/location="{{ location }}" \ + load-balancer.hetzner.cloud/use-private-ip="true" \ + load-balancer.hetzner.cloud/uses-proxyprotocol="false" \ + load-balancer.hetzner.cloud/name="{{ cluster_name }}-lb" \ + load-balancer.hetzner.cloud/node-selector="loadbalancer-target=true" \ + load-balancer.hetzner.cloud/health-check-interval="10s" \ + load-balancer.hetzner.cloud/health-check-timeout="5s" \ + load-balancer.hetzner.cloud/health-check-retries="3" \ + --overwrite + changed_when: true + +- name: Wait a moment for LoadBalancer to start provisioning + pause: + seconds: 15 + +- name: Wait for LoadBalancer to be provisioned + shell: kubectl get svc ingress-nginx-controller -n ingress-nginx -o jsonpath='{.status.loadBalancer.ingress[0].ip}' + register: lb_ip + until: lb_ip.stdout != "" + retries: 60 + delay: 10 + changed_when: false + +- name: Display LoadBalancer IP + debug: + msg: "LoadBalancer IP: {{ lb_ip.stdout }}" + +- name: Install cert-manager + shell: | + kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.3/cert-manager.yaml + register: certmanager_install + changed_when: "'created' in certmanager_install.stdout or 'configured' in certmanager_install.stdout" + +- name: Wait for cert-manager to be ready + shell: kubectl get pods -n cert-manager -o jsonpath='{.items[*].status.phase}' + register: certmanager_status + until: certmanager_status.stdout.find('Running') != -1 and (certmanager_status.stdout | regex_findall('Running') | length) >= 3 + retries: 30 + delay: 10 + changed_when: false + +- name: Wait for cert-manager webhook to be ready + shell: kubectl get deployment -n cert-manager cert-manager-webhook -o jsonpath='{.status.availableReplicas}' + register: webhook_status + until: webhook_status.stdout | int > 0 + retries: 30 + delay: 10 + changed_when: false + +- name: Wait for cert-manager webhook endpoint to be ready + shell: kubectl get endpoints -n cert-manager cert-manager-webhook -o jsonpath='{.subsets[*].addresses[*].ip}' + register: webhook_endpoint + until: webhook_endpoint.stdout != "" + retries: 30 + delay: 10 + changed_when: false + +- name: Wait for webhook configuration to be registered + shell: kubectl get validatingwebhookconfigurations cert-manager-webhook + register: webhook_config + until: webhook_config.rc == 0 + retries: 30 + delay: 10 + changed_when: false + ignore_errors: true + +- name: Wait additional time for webhook to be fully operational + pause: + seconds: 60 + +- name: Create Let's Encrypt Staging ClusterIssuer + shell: | + kubectl apply -f - <&1 | tee /tmp/k3s-install.log + args: + creates: /usr/local/bin/k3s + when: not k3s_binary.stat.exists + register: k3s_install_output + async: 300 + poll: 10 + +- name: Display K3s installation output + debug: + msg: "{{ k3s_install_output.stdout_lines }}" + when: k3s_install_output is defined and k3s_install_output.stdout_lines is defined + +- name: Enable and start K3s agent service + systemd: + name: k3s-agent + enabled: yes + state: started + daemon_reload: yes + +- name: Display K3s worker installation info + debug: + msg: "K3s worker node joined the cluster successfully." diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/tailscale/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/tailscale/tasks/main.yml new file mode 100644 index 0000000..2b746c4 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/tailscale/tasks/main.yml @@ -0,0 +1,54 @@ +--- +- name: Install required packages for Tailscale + apt: + name: + - curl + - gnupg + state: present + update_cache: yes + +- name: Add Tailscale GPG key + shell: | + curl -fsSL https://pkgs.tailscale.com/stable/ubuntu/noble.noarmor.gpg | tee /usr/share/keyrings/tailscale-archive-keyring.gpg >/dev/null + args: + creates: /usr/share/keyrings/tailscale-archive-keyring.gpg + +- name: Add Tailscale repository + shell: | + echo "deb [signed-by=/usr/share/keyrings/tailscale-archive-keyring.gpg] https://pkgs.tailscale.com/stable/ubuntu noble main" | tee /etc/apt/sources.list.d/tailscale.list + args: + creates: /etc/apt/sources.list.d/tailscale.list + +- name: Update apt cache + apt: + update_cache: yes + +- name: Install Tailscale + apt: + name: tailscale + state: present + +- name: Check if Tailscale is already authenticated + shell: tailscale status --json | grep -q '"BackendState":"Running"' + register: tailscale_status + changed_when: false + failed_when: false + +- name: Authenticate Tailscale + shell: | + {% if tailscale_tags is defined and tailscale_tags != "" %} + tailscale up --authkey={{ tailscale_auth_key }} --accept-routes --advertise-tags={{ tailscale_tags }} + {% else %} + tailscale up --authkey={{ tailscale_auth_key }} --accept-routes + {% endif %} + when: tailscale_status.rc != 0 + register: tailscale_auth + +- name: Display Tailscale status + shell: tailscale status + register: tailscale_info + changed_when: false + +- name: Show Tailscale connection info + debug: + msg: "{{ tailscale_info.stdout_lines }}" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml new file mode 100644 index 0000000..176ca53 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml @@ -0,0 +1,111 @@ +--- +- name: Setup K3s Cluster + hosts: all + gather_facts: yes + become: yes + serial: 1 + tasks: + - name: Wait for system to be ready + wait_for_connection: + timeout: 300 + + - name: Update apt cache + apt: + update_cache: yes + cache_valid_time: 3600 + +- name: Install Tailscale on all nodes + hosts: k3s_cluster + gather_facts: yes + become: yes + roles: + - role: tailscale + when: tailscale_auth_key is defined and tailscale_auth_key != "" + +- name: Create K3s user and prepare system + hosts: k3s_cluster + gather_facts: yes + become: yes + roles: + - k3s_user + +- name: Configure K3s Master Node + hosts: k3s_master + gather_facts: yes + become: yes + roles: + - k3s_master + +- name: Install Hetzner Cloud Controller Manager + hosts: k3s_master + gather_facts: no + become: yes + roles: + - k3s_ccm + +- name: Retrieve K3s token from master + hosts: k3s_master + gather_facts: no + become: yes + tasks: + - name: Read K3s token + slurp: + src: /var/lib/rancher/k3s/server/node-token + register: k3s_master_token + + - name: Set token fact for workers + set_fact: + k3s_join_token: "{{ k3s_master_token.content | b64decode | trim }}" + delegate_to: "{{ item }}" + delegate_facts: true + with_items: "{{ groups['k3s_workers'] }}" + +- name: Configure K3s Worker Nodes + hosts: k3s_workers + gather_facts: yes + become: yes + roles: + - k3s_worker + +- name: Install K3s Addons (CSI, Ingress, cert-manager) + hosts: k3s_master + gather_facts: no + become: yes + roles: + - k3s_addons + +- name: Install External Secrets Operator with 1Password + hosts: k3s_master + gather_facts: no + become: yes + roles: + - external_secrets + +- name: Install ArgoCD + hosts: k3s_master + gather_facts: no + become: yes + roles: + - argocd + +- name: Verify Cluster + hosts: k3s_master + gather_facts: no + become: yes + tasks: + - name: Wait for all nodes to be ready + shell: kubectl get nodes --no-headers | grep -v NotReady | wc -l + register: ready_nodes + until: ready_nodes.stdout|int == groups['k3s_cluster']|length + retries: 30 + delay: 10 + changed_when: false + + - name: Display cluster status + shell: kubectl get nodes + register: cluster_status + changed_when: false + + - name: Show cluster nodes + debug: + var: cluster_status.stdout_lines diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf new file mode 100644 index 0000000..f1d264b --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -0,0 +1,283 @@ +terraform { + required_version = ">= 1.0" + required_providers { + hcloud = { + source = "hetznercloud/hcloud" + version = "~> 1.45" + } + random = { + source = "hashicorp/random" + version = "~> 3.5" + } + local = { + source = "hashicorp/local" + version = "~> 2.4" + } + null = { + source = "hashicorp/null" + version = "~> 3.2" + } + } +} + +provider "hcloud" { + token = var.hcloud_token +} + +# SSH Key for accessing the servers +resource "hcloud_ssh_key" "k3s" { + name = "${var.cluster_name}-key" + public_key = file(var.ssh_public_key_path) +} + +# Network for the cluster +resource "hcloud_network" "k3s" { + name = "${var.cluster_name}-network" + ip_range = "10.0.0.0/16" +} + +resource "hcloud_network_subnet" "k3s" { + network_id = hcloud_network.k3s.id + type = "cloud" + network_zone = "eu-central" + ip_range = "10.0.1.0/24" + + lifecycle { + create_before_destroy = false + } +} + +# Firewall rules for K3s nodes +resource "hcloud_firewall" "k3s" { + name = "${var.cluster_name}-firewall" + + # SSH access (temporary for installation) + rule { + direction = "in" + protocol = "tcp" + port = "22" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + + # Internal cluster communication + rule { + direction = "in" + protocol = "tcp" + port = "any" + source_ips = [ + "10.0.0.0/16" + ] + } + + rule { + direction = "in" + protocol = "udp" + port = "any" + source_ips = [ + "10.0.0.0/16" + ] + } + + # HTTP traffic for ingress + rule { + direction = "in" + protocol = "tcp" + port = "80" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + + # HTTPS traffic for ingress + rule { + direction = "in" + protocol = "tcp" + port = "443" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + + # Kubernetes API Server (only from internal network and bastion) + rule { + direction = "in" + protocol = "tcp" + port = "6443" + source_ips = [ + "10.0.0.0/16" + ] + } +} + +# K3s Master Node (Server) +resource "hcloud_server" "k3s_master" { + name = "${var.cluster_name}-master" + server_type = var.server_type + image = var.server_image + location = var.location + ssh_keys = [hcloud_ssh_key.k3s.id] + firewall_ids = [hcloud_firewall.k3s.id] + + network { + network_id = hcloud_network.k3s.id + ip = "10.0.1.2" + } + + public_net { + ipv4_enabled = true + ipv6_enabled = false + } + + labels = { + role = "master" + cluster = var.cluster_name + } + + depends_on = [ + hcloud_network_subnet.k3s + ] +} + +# K3s Worker Nodes +resource "hcloud_server" "k3s_worker" { + count = var.worker_count + name = "${var.cluster_name}-worker-${count.index + 1}" + server_type = var.server_type + image = var.server_image + location = var.location + ssh_keys = [hcloud_ssh_key.k3s.id] + firewall_ids = [hcloud_firewall.k3s.id] + + network { + network_id = hcloud_network.k3s.id + ip = "10.0.1.${count.index + 3}" + } + + public_net { + ipv4_enabled = true + ipv6_enabled = false + } + + labels = { + role = "worker" + cluster = var.cluster_name + } + + depends_on = [ + hcloud_network_subnet.k3s, + hcloud_server.k3s_master + ] +} + +# Generate random token for K3s cluster +resource "random_password" "k3s_token" { + length = 32 + special = false +} + +# Generate Ansible inventory +resource "local_file" "ansible_inventory" { + content = templatefile("${path.module}/ansible/inventory/hosts.tpl", { + master_private_ip = one([for net in hcloud_server.k3s_master.network : net.ip]) + master_public_ip = hcloud_server.k3s_master.ipv4_address + worker_private_ips = [for worker in hcloud_server.k3s_worker : one([for net in worker.network : net.ip])] + worker_public_ips = [for worker in hcloud_server.k3s_worker : worker.ipv4_address] + k3s_version = var.k3s_version + k3s_token = random_password.k3s_token.result + cluster_name = var.cluster_name + ssh_private_key_path = var.ssh_private_key_path + hcloud_token = var.hcloud_token + hcloud_csi_version = var.hcloud_csi_version + hcloud_ccm_version = var.hcloud_ccm_version + network_id = hcloud_network.k3s.id + location = var.location + letsencrypt_email = var.letsencrypt_email + tailscale_auth_key = var.tailscale_auth_key + tailscale_tags = var.tailscale_tags + onepassword_credentials_json = var.onepassword_credentials_json + }) + filename = "${path.module}/ansible/inventory/hosts" + + depends_on = [ + hcloud_server.k3s_master, + hcloud_server.k3s_worker + ] +} + +# Wait for servers to be ready +resource "null_resource" "wait_for_servers" { + provisioner "local-exec" { + command = "sleep 90" + } + + depends_on = [ + local_file.ansible_inventory + ] +} + +# Run Ansible playbook +resource "null_resource" "ansible_provisioning" { + provisioner "local-exec" { + command = "cd ${path.module}/ansible && ansible-playbook -i inventory/hosts site.yml" + } + + depends_on = [ + null_resource.wait_for_servers + ] + + triggers = { + master_id = hcloud_server.k3s_master.id + worker_ids = join(",", hcloud_server.k3s_worker[*].id) + inventory = local_file.ansible_inventory.content + } +} + +# Cleanup script for LoadBalancers before destroying network +# Uses Terraform's external data source to list and delete LoadBalancers via API +resource "null_resource" "cleanup_loadbalancers" { + triggers = { + network_id = hcloud_network.k3s.id + hcloud_token = var.hcloud_token + } + + provisioner "local-exec" { + when = destroy + command = <<-EOT + #!/bin/bash + set -e + + echo "Checking for LoadBalancers attached to network ${self.triggers.network_id}..." + + # Use curl to query Hetzner API + LBS=$(curl -s -H "Authorization: Bearer ${self.triggers.hcloud_token}" \ + "https://api.hetzner.cloud/v1/load_balancers" | \ + jq -r --arg net_id "${self.triggers.network_id}" \ + '.load_balancers[] | select(.private_net[]?.network == ($net_id | tonumber)) | .id') + + if [ -z "$LBS" ]; then + echo "No LoadBalancers found attached to network." + exit 0 + fi + + for lb_id in $LBS; do + echo "Deleting LoadBalancer $lb_id..." + curl -s -X DELETE -H "Authorization: Bearer ${self.triggers.hcloud_token}" \ + "https://api.hetzner.cloud/v1/load_balancers/$lb_id" || true + done + + echo "Waiting for LoadBalancers to be fully deleted..." + sleep 15 + EOT + interpreter = ["bash", "-c"] + } + + depends_on = [ + null_resource.ansible_provisioning, + hcloud_network.k3s + ] +} diff --git a/hetzner-setup/ProcessCube.Cloud/outputs.tf b/hetzner-setup/ProcessCube.Cloud/outputs.tf new file mode 100644 index 0000000..47310c9 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/outputs.tf @@ -0,0 +1,53 @@ +output "master_public_ip" { + description = "Public IP address of the master node" + value = hcloud_server.k3s_master.ipv4_address +} + +output "master_private_ip" { + description = "Private IP address of the master node" + value = one([for net in hcloud_server.k3s_master.network : net.ip]) +} + +output "worker_public_ips" { + description = "Public IP addresses of worker nodes" + value = [for worker in hcloud_server.k3s_worker : worker.ipv4_address] +} + +output "worker_private_ips" { + description = "Private IP addresses of worker nodes" + value = [for worker in hcloud_server.k3s_worker : one([for net in worker.network : net.ip])] +} + +output "k3s_token" { + description = "K3s cluster token (sensitive)" + value = random_password.k3s_token.result + sensitive = true +} + +output "kubeconfig_command" { + description = "Command to get kubeconfig from master node" + value = "ssh -i ~/.ssh/id_ed25519_tes root@${hcloud_server.k3s_master.ipv4_address} 'cat /etc/rancher/k3s/k3s.yaml' > kubeconfig.yaml" +} + +output "network_id" { + description = "ID of the private network" + value = hcloud_network.k3s.id +} + +output "network_name" { + description = "Name of the private network" + value = hcloud_network.k3s.name +} + +output "load_balancer_info" { + description = "Information about LoadBalancer provisioning" + value = "LoadBalancer will be created automatically by Hetzner Cloud Controller Manager when Nginx Ingress Controller is deployed. Check with: kubectl get svc -n ingress-nginx ingress-nginx-controller" +} + +output "ssh_commands" { + description = "SSH commands to access nodes" + value = { + master = "ssh -i ~/.ssh/id_ed25519_tes root@${hcloud_server.k3s_master.ipv4_address}" + workers = [for worker in hcloud_server.k3s_worker : "ssh -i ~/.ssh/id_ed25519_tes root@${worker.ipv4_address}"] + } +} diff --git a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example new file mode 100644 index 0000000..bd501cb --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example @@ -0,0 +1,29 @@ +# Hetzner Cloud Configuration +hcloud_token = "YOUR_HETZNER_API_TOKEN_HERE" + +# Cluster Configuration +cluster_name = "processcube-k3s" +location = "fsn1" # Options: nbg1, fsn1, hel1 +server_type = "cx43" # Options: cx11, cx21, cx31, cx41, cx51 +worker_count = 2 + +# K3s Version +k3s_version = "v1.28.5+k3s1" + +# Hetzer csi-Driver +hcloud_csi_version = "v2.18.1" + +# SSH Key Paths +ssh_public_key_path = "~/.ssh/id_rsa.pub" +ssh_private_key_path = "~/.ssh/id_rsa" + +# Let's encrypt +letsencrypt_email = "info@processcube.io" + +# Tailscale Configuration +tailscale_auth_key = "YOUR_TAILSCALE_AUTH_KEY_HERE" +# tailscale_tags = "tag:k3s" # Optional: Uncomment to use tags + +# 1Password Connect Configuration for External Secrets Operator +onepassword_credentials_json = "/path/to/1password-credentials.json" +# Note: onepassword-connect-token must be created per application namespace diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf new file mode 100644 index 0000000..b51b125 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -0,0 +1,92 @@ +variable "hcloud_token" { + description = "Hetzner Cloud API Token" + type = string + sensitive = true +} + +variable "cluster_name" { + description = "Name of the K3s cluster" + type = string + default = "processcube-k3s" +} + +variable "location" { + description = "Hetzner Cloud location" + type = string + default = "fsn1" + # Available: nbg1 (Nuremberg), fsn1 (Falkenstein), hel1 (Helsinki) +} + +variable "server_type" { + description = "Hetzner Cloud server type" + type = string + default = "cx11" # 2 vCPU, 4GB RAM + # Options: cx11, cx21, cx31, cx41, cx51 + # cpx11, cpx21, cpx31, cpx41, cpx51 (AMD) +} + +variable "server_image" { + description = "Server image to use" + type = string + default = "ubuntu-22.04" +} + +variable "worker_count" { + description = "Number of worker nodes" + type = number + default = 2 +} + +variable "k3s_version" { + description = "K3s version to install" + type = string + default = "v1.28.5+k3s1" +} + +variable "hcloud_csi_version" { + description = "Hetzner Cloud CSI Driver version" + type = string + default = "v2.18.1" +} + +variable "hcloud_ccm_version" { + description = "Hetzner Cloud Controller Manager version" + type = string + default = "v1.20.0" +} + +variable "ssh_public_key_path" { + description = "Path to SSH public key file" + type = string + default = "~/.ssh/id_rsa.pub" +} + +variable "ssh_private_key_path" { + description = "Path to SSH private key file for Ansible" + type = string + default = "~/.ssh/id_rsa" +} + +variable "letsencrypt_email" { + description = "Email address for Let's Encrypt certificate notifications" + type = string +} + +variable "tailscale_auth_key" { + description = "Tailscale authentication key (optional - Tailscale will only be installed if this is set)" + type = string + sensitive = true + default = "" +} + +variable "tailscale_tags" { + description = "Tailscale tags to apply to nodes (optional)" + type = string + default = "" +} + +variable "onepassword_credentials_json" { + description = "Path to 1Password Connect credentials JSON file" + type = string + sensitive = true +} From 0b374c339eec9576b136285770896cd4f3f70214 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 08:32:51 +0100 Subject: [PATCH 13/58] Make External Secrets Operator installation optional Only install the External Secrets Operator with 1Password Connect when onepassword_credentials_json variable is set. Also fix hardcoded SSH key paths in outputs.tf. --- hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl | 2 ++ hetzner-setup/ProcessCube.Cloud/ansible/site.yml | 3 ++- hetzner-setup/ProcessCube.Cloud/outputs.tf | 6 +++--- hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example | 5 +++-- hetzner-setup/ProcessCube.Cloud/variables.tf | 3 ++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl index 7f2ed36..a4c555c 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl +++ b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl @@ -25,5 +25,7 @@ letsencrypt_email=${letsencrypt_email} tailscale_auth_key=${tailscale_auth_key} tailscale_tags=${tailscale_tags} %{ endif ~} +%{ if onepassword_credentials_json != "" ~} onepassword_credentials_json=${onepassword_credentials_json} +%{ endif ~} ansible_python_interpreter=/usr/bin/python3 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml index 176ca53..32c14b7 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml @@ -79,7 +79,8 @@ gather_facts: no become: yes roles: - - external_secrets + - role: external_secrets + when: onepassword_credentials_json is defined and onepassword_credentials_json != "" - name: Install ArgoCD hosts: k3s_master diff --git a/hetzner-setup/ProcessCube.Cloud/outputs.tf b/hetzner-setup/ProcessCube.Cloud/outputs.tf index 47310c9..b7ae626 100644 --- a/hetzner-setup/ProcessCube.Cloud/outputs.tf +++ b/hetzner-setup/ProcessCube.Cloud/outputs.tf @@ -26,7 +26,7 @@ output "k3s_token" { output "kubeconfig_command" { description = "Command to get kubeconfig from master node" - value = "ssh -i ~/.ssh/id_ed25519_tes root@${hcloud_server.k3s_master.ipv4_address} 'cat /etc/rancher/k3s/k3s.yaml' > kubeconfig.yaml" + value = "ssh root@${hcloud_server.k3s_master.ipv4_address} 'cat /etc/rancher/k3s/k3s.yaml' > kubeconfig.yaml" } output "network_id" { @@ -47,7 +47,7 @@ output "load_balancer_info" { output "ssh_commands" { description = "SSH commands to access nodes" value = { - master = "ssh -i ~/.ssh/id_ed25519_tes root@${hcloud_server.k3s_master.ipv4_address}" - workers = [for worker in hcloud_server.k3s_worker : "ssh -i ~/.ssh/id_ed25519_tes root@${worker.ipv4_address}"] + master = "ssh root@${hcloud_server.k3s_master.ipv4_address}" + workers = [for worker in hcloud_server.k3s_worker : "ssh root@${worker.ipv4_address}"] } } diff --git a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example index bd501cb..8cc0c47 100644 --- a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example +++ b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example @@ -24,6 +24,7 @@ letsencrypt_email = "info@processcube.io" tailscale_auth_key = "YOUR_TAILSCALE_AUTH_KEY_HERE" # tailscale_tags = "tag:k3s" # Optional: Uncomment to use tags -# 1Password Connect Configuration for External Secrets Operator -onepassword_credentials_json = "/path/to/1password-credentials.json" +# 1Password Connect Configuration for External Secrets Operator (Optional) +# onepassword_credentials_json = "/path/to/1password-credentials.json" +# Note: External Secrets Operator will only be installed if this is set # Note: onepassword-connect-token must be created per application namespace diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf index b51b125..2a6bb02 100644 --- a/hetzner-setup/ProcessCube.Cloud/variables.tf +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -86,7 +86,8 @@ variable "tailscale_tags" { } variable "onepassword_credentials_json" { - description = "Path to 1Password Connect credentials JSON file" + description = "Path to 1Password Connect credentials JSON file (optional - External Secrets Operator will only be installed if this is set)" type = string sensitive = true + default = "" } From 719a77abd92b6585a252ddf5b878d48033040764 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 14:09:16 +0100 Subject: [PATCH 14/58] Add install_cuby role with ProcessCube marketplace secrets - Add processcube_api_key terraform variable - Create install_cuby ansible role that sets up: - processcube namespace - regcred ImagePull secret for marketplace.processcube.io - processcube-api-key secret with the API key --- .../ansible/inventory/hosts.tpl | 1 + .../ansible/roles/install_cuby/tasks/main.yml | 31 +++++++++++++++++++ .../ProcessCube.Cloud/ansible/site.yml | 7 +++++ hetzner-setup/ProcessCube.Cloud/main.tf | 1 + .../terraform.tfvars.example | 3 ++ hetzner-setup/ProcessCube.Cloud/variables.tf | 6 ++++ 6 files changed, 49 insertions(+) create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl index a4c555c..748dd64 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl +++ b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl @@ -28,4 +28,5 @@ tailscale_tags=${tailscale_tags} %{ if onepassword_credentials_json != "" ~} onepassword_credentials_json=${onepassword_credentials_json} %{ endif ~} +processcube_api_key=${processcube_api_key} ansible_python_interpreter=/usr/bin/python3 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml new file mode 100644 index 0000000..f23d624 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml @@ -0,0 +1,31 @@ +--- +- name: Create processcube namespace + shell: kubectl create namespace processcube --dry-run=client -o yaml | kubectl apply -f - + changed_when: true + +- name: Create ProcessCube Marketplace ImagePull Secret + shell: | + kubectl create secret docker-registry regcred \ + --docker-server=https://marketplace.processcube.io \ + --docker-username=processcube \ + --docker-password="{{ processcube_api_key }}" \ + -n processcube \ + --dry-run=client -o yaml | kubectl apply -f - + changed_when: true + +- name: Create ProcessCube API Key Secret + shell: | + kubectl create secret generic processcube-api-key \ + --from-literal=api-key="{{ processcube_api_key }}" \ + -n processcube \ + --dry-run=client -o yaml | kubectl apply -f - + changed_when: true + +- name: Display ProcessCube setup info + debug: + msg: + - "ProcessCube secrets created in namespace 'processcube'" + - "" + - "Available secrets:" + - " - regcred (ImagePull Secret)" + - " - processcube-api-key (API Key Secret)" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml index 32c14b7..c506001 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml @@ -89,6 +89,13 @@ roles: - argocd +- name: Install Cuby + hosts: k3s_master + gather_facts: no + become: yes + roles: + - install_cuby + - name: Verify Cluster hosts: k3s_master gather_facts: no diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index f1d264b..70f00e3 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -200,6 +200,7 @@ resource "local_file" "ansible_inventory" { tailscale_auth_key = var.tailscale_auth_key tailscale_tags = var.tailscale_tags onepassword_credentials_json = var.onepassword_credentials_json + processcube_api_key = var.processcube_api_key }) filename = "${path.module}/ansible/inventory/hosts" diff --git a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example index 8cc0c47..77d0dc9 100644 --- a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example +++ b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example @@ -28,3 +28,6 @@ tailscale_auth_key = "YOUR_TAILSCALE_AUTH_KEY_HERE" # onepassword_credentials_json = "/path/to/1password-credentials.json" # Note: External Secrets Operator will only be installed if this is set # Note: onepassword-connect-token must be created per application namespace + +# ProcessCube Marketplace Configuration +processcube_api_key = "YOUR_PROCESSCUBE_API_KEY_HERE" diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf index 2a6bb02..5f5b892 100644 --- a/hetzner-setup/ProcessCube.Cloud/variables.tf +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -91,3 +91,9 @@ variable "onepassword_credentials_json" { sensitive = true default = "" } + +variable "processcube_api_key" { + description = "ProcessCube API key for marketplace.processcube.io image registry" + type = string + sensitive = true +} From e8d1a8d8cdbb28deb4f30c8aa328e90446fe509e Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 14:24:16 +0100 Subject: [PATCH 15/58] Add Cuby Operator deployment to install_cuby role - Add cuby_domain terraform variable for ingress configuration - Add cuby-operator.yaml.j2 template with all K8s resources - Deploy ServiceAccount, RBAC, ConfigMap, PVC, Deployment, Service, Ingress - Wait for rollout to complete before finishing --- .../ansible/inventory/hosts.tpl | 1 + .../ansible/roles/install_cuby/tasks/main.yml | 28 ++- .../templates/cuby-operator.yaml.j2 | 232 ++++++++++++++++++ hetzner-setup/ProcessCube.Cloud/main.tf | 1 + .../terraform.tfvars.example | 1 + hetzner-setup/ProcessCube.Cloud/variables.tf | 5 + 6 files changed, 266 insertions(+), 2 deletions(-) create mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl index 748dd64..51ca22b 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl +++ b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl @@ -29,4 +29,5 @@ tailscale_tags=${tailscale_tags} onepassword_credentials_json=${onepassword_credentials_json} %{ endif ~} processcube_api_key=${processcube_api_key} +cuby_domain=${cuby_domain} ansible_python_interpreter=/usr/bin/python3 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml index f23d624..442a0c8 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml @@ -21,11 +21,35 @@ --dry-run=client -o yaml | kubectl apply -f - changed_when: true -- name: Display ProcessCube setup info +- name: Template Cuby Operator manifest + template: + src: cuby-operator.yaml.j2 + dest: /tmp/cuby-operator.yaml + mode: '0644' + +- name: Deploy Cuby Operator + shell: kubectl apply -f /tmp/cuby-operator.yaml + register: cuby_deploy + changed_when: "'created' in cuby_deploy.stdout or 'configured' in cuby_deploy.stdout" + +- name: Wait for Cuby Operator to be ready + shell: | + kubectl rollout status deployment/cuby-operator -n processcube --timeout=120s + register: cuby_status + changed_when: false + +- name: Remove temporary manifest + file: + path: /tmp/cuby-operator.yaml + state: absent + +- name: Display Cuby setup info debug: msg: - - "ProcessCube secrets created in namespace 'processcube'" + - "Cuby Operator deployed successfully in namespace 'processcube'" - "" - "Available secrets:" - " - regcred (ImagePull Secret)" - " - processcube-api-key (API Key Secret)" + - "" + - "Cuby is available at: https://{{ cuby_domain }}" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 new file mode 100644 index 0000000..eaa4515 --- /dev/null +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 @@ -0,0 +1,232 @@ +# Cuby Operator Kubernetes Manifests +# Managed by Ansible + +--- +# ServiceAccount +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cuby-operator + namespace: processcube + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator + +--- +# ClusterRole - permissions to manage ProcessCube resources +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cuby-operator + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator +rules: + # Manage Deployments, Services, ConfigMaps, Secrets + - apiGroups: [""] + resources: ["services", "configmaps", "secrets", "persistentvolumeclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Manage Ingress + - apiGroups: ["networking.k8s.io"] + resources: ["ingresses"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Read Pods and Logs + - apiGroups: [""] + resources: ["pods", "pods/log"] + verbs: ["get", "list", "watch"] + # Read Nodes (for status) + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list"] + # Read Namespaces + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list", "watch", "create"] + +--- +# ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cuby-operator + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cuby-operator +subjects: + - kind: ServiceAccount + name: cuby-operator + namespace: processcube + +--- +# ConfigMap for Cuby configuration +apiVersion: v1 +kind: ConfigMap +metadata: + name: cuby-config + namespace: processcube + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator +data: + # Configuration will be mounted to /data/config.json + config.json: | + { + "deploymentTarget": "cloud", + "setupComplete": false + } + +--- +# PersistentVolumeClaim for Cuby data +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: cuby-data + namespace: processcube + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: hcloud-volumes + +--- +# Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cuby-operator + namespace: processcube + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator + template: + metadata: + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator + spec: + serviceAccountName: cuby-operator + imagePullSecrets: + - name: regcred + containers: + - name: cuby + image: marketplace.processcube.io/5minds/cuby:latest + imagePullPolicy: Always + ports: + - name: http + containerPort: 3847 + protocol: TCP + env: + - name: NODE_ENV + value: "production" + - name: CUBY_MODE + value: "operator" + - name: CUBY_CONFIG_DIR + value: "/data" + - name: PORT + value: "3847" + - name: KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: data + mountPath: /data + - name: config + mountPath: /data/config.json + subPath: config.json + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 10 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 5 + periodSeconds: 10 + volumes: + - name: data + persistentVolumeClaim: + claimName: cuby-data + - name: config + configMap: + name: cuby-config + +--- +# Service +apiVersion: v1 +kind: Service +metadata: + name: cuby-operator + namespace: processcube + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator +spec: + type: ClusterIP + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP + selector: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator + +--- +# Ingress +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: cuby-operator + namespace: processcube + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-body-size: "50m" +spec: + ingressClassName: nginx + tls: + - hosts: + - {{ cuby_domain }} + secretName: cuby-tls + rules: + - host: {{ cuby_domain }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: cuby-operator + port: + name: http diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index 70f00e3..afe3a84 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -201,6 +201,7 @@ resource "local_file" "ansible_inventory" { tailscale_tags = var.tailscale_tags onepassword_credentials_json = var.onepassword_credentials_json processcube_api_key = var.processcube_api_key + cuby_domain = var.cuby_domain }) filename = "${path.module}/ansible/inventory/hosts" diff --git a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example index 77d0dc9..c823522 100644 --- a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example +++ b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example @@ -31,3 +31,4 @@ tailscale_auth_key = "YOUR_TAILSCALE_AUTH_KEY_HERE" # ProcessCube Marketplace Configuration processcube_api_key = "YOUR_PROCESSCUBE_API_KEY_HERE" +cuby_domain = "cuby.example.com" diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf index 5f5b892..3cb2632 100644 --- a/hetzner-setup/ProcessCube.Cloud/variables.tf +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -97,3 +97,8 @@ variable "processcube_api_key" { type = string sensitive = true } + +variable "cuby_domain" { + description = "Domain for Cuby operator ingress (e.g. cuby.example.com)" + type = string +} From 6bff71edfb7df1eb57fb4678a3e55ac1f73175f4 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 14:25:26 +0100 Subject: [PATCH 16/58] use cuby 0.6.0-develop.4 --- .../ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 index eaa4515..8e9dd92 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 @@ -127,7 +127,7 @@ spec: - name: regcred containers: - name: cuby - image: marketplace.processcube.io/5minds/cuby:latest + image: marketplace.processcube.io/5minds/cuby:0.6.0-develop.4 imagePullPolicy: Always ports: - name: http From cf196da1d9e50e159b09ce45c840e92b749e1826 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 15:06:15 +0100 Subject: [PATCH 17/58] Remove Cuby ingress, use port-forward instead - Remove Ingress resource from cuby-operator template - Remove cuby_domain terraform variable - Update info output with port-forward command --- .../ansible/inventory/hosts.tpl | 1 - .../ansible/roles/install_cuby/tasks/main.yml | 4 ++- .../templates/cuby-operator.yaml.j2 | 31 ------------------- hetzner-setup/ProcessCube.Cloud/main.tf | 1 - .../terraform.tfvars.example | 1 - hetzner-setup/ProcessCube.Cloud/variables.tf | 5 --- 6 files changed, 3 insertions(+), 40 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl index 51ca22b..748dd64 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl +++ b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl @@ -29,5 +29,4 @@ tailscale_tags=${tailscale_tags} onepassword_credentials_json=${onepassword_credentials_json} %{ endif ~} processcube_api_key=${processcube_api_key} -cuby_domain=${cuby_domain} ansible_python_interpreter=/usr/bin/python3 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml index 442a0c8..1cab158 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml @@ -52,4 +52,6 @@ - " - regcred (ImagePull Secret)" - " - processcube-api-key (API Key Secret)" - "" - - "Cuby is available at: https://{{ cuby_domain }}" + - "Access Cuby via port-forward:" + - " kubectl port-forward svc/cuby-operator -n processcube 3847:80" + - " Then open: http://localhost:3847" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 index 8e9dd92..c876395 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 @@ -199,34 +199,3 @@ spec: selector: app.kubernetes.io/name: cuby app.kubernetes.io/component: operator - ---- -# Ingress -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: cuby-operator - namespace: processcube - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator - annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod - nginx.ingress.kubernetes.io/proxy-body-size: "50m" -spec: - ingressClassName: nginx - tls: - - hosts: - - {{ cuby_domain }} - secretName: cuby-tls - rules: - - host: {{ cuby_domain }} - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: cuby-operator - port: - name: http diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index afe3a84..70f00e3 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -201,7 +201,6 @@ resource "local_file" "ansible_inventory" { tailscale_tags = var.tailscale_tags onepassword_credentials_json = var.onepassword_credentials_json processcube_api_key = var.processcube_api_key - cuby_domain = var.cuby_domain }) filename = "${path.module}/ansible/inventory/hosts" diff --git a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example index c823522..77d0dc9 100644 --- a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example +++ b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example @@ -31,4 +31,3 @@ tailscale_auth_key = "YOUR_TAILSCALE_AUTH_KEY_HERE" # ProcessCube Marketplace Configuration processcube_api_key = "YOUR_PROCESSCUBE_API_KEY_HERE" -cuby_domain = "cuby.example.com" diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf index 3cb2632..5f5b892 100644 --- a/hetzner-setup/ProcessCube.Cloud/variables.tf +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -97,8 +97,3 @@ variable "processcube_api_key" { type = string sensitive = true } - -variable "cuby_domain" { - description = "Domain for Cuby operator ingress (e.g. cuby.example.com)" - type = string -} From c7dfc5d14d8819fa2dbad4bebf372d7911acb5ae Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 16:45:17 +0100 Subject: [PATCH 18/58] Fix Cuby K8s certificate issue with NODE_EXTRA_CA_CERTS Add NODE_EXTRA_CA_CERTS env var pointing to the Kubernetes service account CA certificate to trust the self-signed K3s API server cert. --- .../ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 index c876395..f1451cb 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 @@ -136,6 +136,8 @@ spec: env: - name: NODE_ENV value: "production" + - name: NODE_EXTRA_CA_CERTS + value: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - name: CUBY_MODE value: "operator" - name: CUBY_CONFIG_DIR From a3d5c6e9ab39ad045e3454ce20adc65f5eb319fb Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 22:38:06 +0100 Subject: [PATCH 19/58] Add dynamic Ingress for Cuby using LoadBalancer IP - Get LoadBalancer IP from ingress-nginx-controller service - Create Ingress with cuby..nip.io domain - Enable TLS with letsencrypt-prod cluster issuer --- .../ansible/roles/install_cuby/tasks/main.yml | 17 ++++++++-- .../templates/cuby-operator.yaml.j2 | 31 +++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml index 1cab158..0ec052a 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml @@ -21,6 +21,19 @@ --dry-run=client -o yaml | kubectl apply -f - changed_when: true +- name: Get LoadBalancer IP from ingress-nginx + shell: | + kubectl get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.status.loadBalancer.ingress[0].ip}' + register: loadbalancer_ip_result + until: loadbalancer_ip_result.stdout != "" + retries: 30 + delay: 10 + changed_when: false + +- name: Set cuby_domain fact + set_fact: + cuby_domain: "cuby.{{ loadbalancer_ip_result.stdout }}.nip.io" + - name: Template Cuby Operator manifest template: src: cuby-operator.yaml.j2 @@ -52,6 +65,4 @@ - " - regcred (ImagePull Secret)" - " - processcube-api-key (API Key Secret)" - "" - - "Access Cuby via port-forward:" - - " kubectl port-forward svc/cuby-operator -n processcube 3847:80" - - " Then open: http://localhost:3847" + - "Cuby is available at: https://{{ cuby_domain }}" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 index f1451cb..46bb54d 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 @@ -201,3 +201,34 @@ spec: selector: app.kubernetes.io/name: cuby app.kubernetes.io/component: operator + +--- +# Ingress +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: cuby-operator + namespace: processcube + labels: + app.kubernetes.io/name: cuby + app.kubernetes.io/component: operator + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-body-size: "50m" +spec: + ingressClassName: nginx + tls: + - hosts: + - {{ cuby_domain }} + secretName: cuby-tls + rules: + - host: {{ cuby_domain }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: cuby-operator + port: + name: http From 284f60d376e9680003991cae9a27b36bbc33790a Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 22:40:42 +0100 Subject: [PATCH 20/58] Add cuby_url terraform output - Save cuby_domain to local file during Ansible run - Add terraform output that reads the domain from file - Add generated files to .gitignore --- hetzner-setup/ProcessCube.Cloud/.gitignore | 4 ++++ .../ansible/roles/install_cuby/tasks/main.yml | 7 +++++++ hetzner-setup/ProcessCube.Cloud/outputs.tf | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/.gitignore b/hetzner-setup/ProcessCube.Cloud/.gitignore index 94faf27..9742754 100644 --- a/hetzner-setup/ProcessCube.Cloud/.gitignore +++ b/hetzner-setup/ProcessCube.Cloud/.gitignore @@ -28,3 +28,7 @@ id_rsa* # OS files .DS_Store Thumbs.db + +# Generated files +cuby_domain.txt +ansible/inventory/hosts diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml index 0ec052a..f110675 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml @@ -34,6 +34,13 @@ set_fact: cuby_domain: "cuby.{{ loadbalancer_ip_result.stdout }}.nip.io" +- name: Save cuby_domain to local file + copy: + content: "{{ cuby_domain }}" + dest: "{{ playbook_dir }}/../cuby_domain.txt" + mode: '0644' + delegate_to: localhost + - name: Template Cuby Operator manifest template: src: cuby-operator.yaml.j2 diff --git a/hetzner-setup/ProcessCube.Cloud/outputs.tf b/hetzner-setup/ProcessCube.Cloud/outputs.tf index b7ae626..48a120e 100644 --- a/hetzner-setup/ProcessCube.Cloud/outputs.tf +++ b/hetzner-setup/ProcessCube.Cloud/outputs.tf @@ -51,3 +51,8 @@ output "ssh_commands" { workers = [for worker in hcloud_server.k3s_worker : "ssh root@${worker.ipv4_address}"] } } + +output "cuby_url" { + description = "URL to access Cuby operator" + value = fileexists("${path.module}/cuby_domain.txt") ? "https://${trimspace(file("${path.module}/cuby_domain.txt"))}" : "Run 'terraform apply' to deploy the cluster first" +} From 53adca38093cdf3ad00e77ba441650cf854dcc95 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 23:07:32 +0100 Subject: [PATCH 21/58] fix Save cuby_domain to local file --- .../ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml index f110675..ca68a82 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml @@ -40,6 +40,7 @@ dest: "{{ playbook_dir }}/../cuby_domain.txt" mode: '0644' delegate_to: localhost + become: false - name: Template Cuby Operator manifest template: From 136a12b7df96c3384e1928e82ced39b78ef8fd0b Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 23:41:58 +0100 Subject: [PATCH 22/58] Fix cluster-issuer name and become for local task - Change cluster-issuer to letsencrypt-production - Add become: false for local file copy task --- .../ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 index 46bb54d..e88daa5 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 @@ -213,7 +213,7 @@ metadata: app.kubernetes.io/name: cuby app.kubernetes.io/component: operator annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod + cert-manager.io/cluster-issuer: letsencrypt-production nginx.ingress.kubernetes.io/proxy-body-size: "50m" spec: ingressClassName: nginx From 001cf37da5aeb8e032008a7d8a71025b7afec6f1 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 26 Jan 2026 23:54:07 +0100 Subject: [PATCH 23/58] bump cuby image to 0.6.0-develop.6 --- .../ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 index e88daa5..b68a948 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 @@ -127,7 +127,7 @@ spec: - name: regcred containers: - name: cuby - image: marketplace.processcube.io/5minds/cuby:0.6.0-develop.4 + image: marketplace.processcube.io/5minds/cuby:0.6.0-develop.6 imagePullPolicy: Always ports: - name: http From a5099becb5927c085cfdf7437a72799fa67978cc Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Tue, 3 Feb 2026 14:28:30 +0100 Subject: [PATCH 24/58] Fix ArgoCD installation annotation size limit error --- .../ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml index 23cdbfe..b0be09e 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml @@ -5,7 +5,7 @@ - name: Install ArgoCD shell: | - kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml + kubectl apply --server-side -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml register: argocd_install changed_when: "'created' in argocd_install.stdout or 'configured' in argocd_install.stdout" From df61edf3ea0ed76c5071b6c1fdfc06770d079300 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 4 Mar 2026 15:28:32 +0100 Subject: [PATCH 25/58] Use K3s stable channel instead of pinned version Replace hardcoded INSTALL_K3S_VERSION with INSTALL_K3S_CHANNEL=stable so K3s always installs the latest stable release. The old pinned version v1.28.5+k3s1 was no longer available for download, causing worker node setup to fail. --- hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl | 1 - .../ansible/roles/k3s_master/tasks/main.yml | 4 ++-- .../ansible/roles/k3s_worker/tasks/main.yml | 4 ++-- hetzner-setup/ProcessCube.Cloud/main.tf | 1 - hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example | 3 --- hetzner-setup/ProcessCube.Cloud/variables.tf | 6 ------ 6 files changed, 4 insertions(+), 15 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl index 748dd64..b9b4ec0 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl +++ b/hetzner-setup/ProcessCube.Cloud/ansible/inventory/hosts.tpl @@ -11,7 +11,6 @@ k3s_master k3s_workers [k3s_cluster:vars] -k3s_version=${k3s_version} k3s_token=${k3s_token} master_ip=${master_private_ip} cluster_name=${cluster_name} diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 39f61ea..f3bcab8 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -87,7 +87,7 @@ - name: Install K3s master with Tailscale shell: | - INSTALL_K3S_VERSION="{{ k3s_version }}" \ + INSTALL_K3S_CHANNEL=stable \ /tmp/k3s_install.sh server \ --cluster-init \ --node-ip {{ private_ip.stdout }} \ @@ -111,7 +111,7 @@ - name: Install K3s master without Tailscale shell: | - INSTALL_K3S_VERSION="{{ k3s_version }}" \ + INSTALL_K3S_CHANNEL=stable \ /tmp/k3s_install.sh server \ --cluster-init \ --node-ip {{ private_ip.stdout }} \ diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index b33d9d8..2978166 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -84,14 +84,14 @@ debug: msg: - "Installing K3s worker with:" - - " Version: {{ k3s_version }}" + - " Channel: stable (latest)" - " Master IP: {{ master_ip }}" - " Node IP: {{ worker_private_ip.stdout }}" - " Node Name: {{ node_hostname.stdout }}" - name: Install K3s worker shell: | - INSTALL_K3S_VERSION="{{ k3s_version }}" \ + INSTALL_K3S_CHANNEL=stable \ K3S_URL=https://{{ master_ip }}:6443 \ K3S_TOKEN="{{ k3s_join_token }}" \ INSTALL_K3S_EXEC="--node-ip {{ worker_private_ip.stdout }} --flannel-iface=enp7s0 --kubelet-arg cloud-provider=external" \ diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index 70f00e3..f50ac01 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -187,7 +187,6 @@ resource "local_file" "ansible_inventory" { master_public_ip = hcloud_server.k3s_master.ipv4_address worker_private_ips = [for worker in hcloud_server.k3s_worker : one([for net in worker.network : net.ip])] worker_public_ips = [for worker in hcloud_server.k3s_worker : worker.ipv4_address] - k3s_version = var.k3s_version k3s_token = random_password.k3s_token.result cluster_name = var.cluster_name ssh_private_key_path = var.ssh_private_key_path diff --git a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example index 77d0dc9..9e764be 100644 --- a/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example +++ b/hetzner-setup/ProcessCube.Cloud/terraform.tfvars.example @@ -7,9 +7,6 @@ location = "fsn1" # Options: nbg1, fsn1, hel1 server_type = "cx43" # Options: cx11, cx21, cx31, cx41, cx51 worker_count = 2 -# K3s Version -k3s_version = "v1.28.5+k3s1" - # Hetzer csi-Driver hcloud_csi_version = "v2.18.1" diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf index 5f5b892..3fe04c2 100644 --- a/hetzner-setup/ProcessCube.Cloud/variables.tf +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -37,12 +37,6 @@ variable "worker_count" { default = 2 } -variable "k3s_version" { - description = "K3s version to install" - type = string - default = "v1.28.5+k3s1" -} - variable "hcloud_csi_version" { description = "Hetzner Cloud CSI Driver version" type = string From bca9df4b8ebaa79fbde8bf2133637746a775c351 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 4 Mar 2026 15:39:59 +0100 Subject: [PATCH 26/58] Upgrade server image to Ubuntu 24.04 LTS Ubuntu 22.04 (jammy) repositories are no longer available on Hetzner mirrors, causing apt cache updates to fail on worker nodes. Also rename netcat package to netcat-openbsd for Ubuntu 24.04 compatibility. --- .../ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml | 2 +- .../ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml | 2 +- hetzner-setup/ProcessCube.Cloud/variables.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index f3bcab8..92fadef 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -6,7 +6,7 @@ - wget - git - ufw - - netcat + - netcat-openbsd - jq state: present update_cache: yes diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index 2978166..9e34aff 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -5,7 +5,7 @@ - curl - wget - ufw - - netcat + - netcat-openbsd state: present update_cache: yes diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf index 3fe04c2..1c356ae 100644 --- a/hetzner-setup/ProcessCube.Cloud/variables.tf +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -28,7 +28,7 @@ variable "server_type" { variable "server_image" { description = "Server image to use" type = string - default = "ubuntu-22.04" + default = "ubuntu-24.04" } variable "worker_count" { From e736e275ebcb8675f434d50490fb4b5aedb935ad Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 4 Mar 2026 15:57:25 +0100 Subject: [PATCH 27/58] Force apt to use IPv4 to fix hanging cache updates Hetzner mirrors over IPv6 are unreachable from the servers, causing apt update to hang indefinitely. Adding ForceIPv4 apt config before package installation resolves the connectivity issue. --- .../ansible/roles/k3s_master/tasks/main.yml | 5 +++++ .../ansible/roles/k3s_worker/tasks/main.yml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 92fadef..3ad6184 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -1,4 +1,9 @@ --- +- name: Force apt to use IPv4 + copy: + content: 'Acquire::ForceIPv4 "true";' + dest: /etc/apt/apt.conf.d/99force-ipv4 + - name: Install required packages apt: name: diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index 9e34aff..ea18aff 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -1,4 +1,9 @@ --- +- name: Force apt to use IPv4 + copy: + content: 'Acquire::ForceIPv4 "true";' + dest: /etc/apt/apt.conf.d/99force-ipv4 + - name: Install required packages apt: name: From 5da693add7f9f4102bfaee24b13a54d4ad3abeb1 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 4 Mar 2026 16:07:49 +0100 Subject: [PATCH 28/58] Prefer IPv4 system-wide via gai.conf on all nodes The apt ForceIPv4 fix only affects apt. Other tools like Python urllib (used by Ansible get_url) still try IPv6 first, causing downloads to fail with "Network is unreachable". Setting gai.conf precedence makes all applications prefer IPv4. --- .../ansible/roles/k3s_master/tasks/main.yml | 6 ++++++ .../ansible/roles/k3s_worker/tasks/main.yml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 3ad6184..281848b 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -1,4 +1,10 @@ --- +- name: Prefer IPv4 over IPv6 system-wide + lineinfile: + path: /etc/gai.conf + line: "precedence ::ffff:0:0/96 100" + create: yes + - name: Force apt to use IPv4 copy: content: 'Acquire::ForceIPv4 "true";' diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index ea18aff..4426781 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -1,4 +1,10 @@ --- +- name: Prefer IPv4 over IPv6 system-wide + lineinfile: + path: /etc/gai.conf + line: "precedence ::ffff:0:0/96 100" + create: yes + - name: Force apt to use IPv4 copy: content: 'Acquire::ForceIPv4 "true";' From ca264681fb1509363caae3ebf67d67681a9e470e Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 4 Mar 2026 16:19:44 +0100 Subject: [PATCH 29/58] Configure DNS resolvers and add download retries Worker nodes fail DNS resolution intermittently. Configure Hetzner and Cloudflare DNS resolvers via systemd-resolved. Also add retries to the K3s script download and verify installation before starting the k3s-agent service. --- .../ansible/roles/k3s_master/tasks/main.yml | 13 +++++++++ .../ansible/roles/k3s_worker/tasks/main.yml | 29 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 281848b..c4ee50a 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -5,6 +5,19 @@ line: "precedence ::ffff:0:0/96 100" create: yes +- name: Configure reliable DNS resolvers + copy: + content: | + [Resolve] + DNS=185.12.64.1 185.12.64.2 1.1.1.1 + FallbackDNS=8.8.8.8 8.8.4.4 + dest: /etc/systemd/resolved.conf + +- name: Restart systemd-resolved + systemd: + name: systemd-resolved + state: restarted + - name: Force apt to use IPv4 copy: content: 'Acquire::ForceIPv4 "true";' diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index 4426781..199c026 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -5,6 +5,19 @@ line: "precedence ::ffff:0:0/96 100" create: yes +- name: Configure reliable DNS resolvers + copy: + content: | + [Resolve] + DNS=185.12.64.1 185.12.64.2 1.1.1.1 + FallbackDNS=8.8.8.8 8.8.4.4 + dest: /etc/systemd/resolved.conf + +- name: Restart systemd-resolved + systemd: + name: systemd-resolved + state: restarted + - name: Force apt to use IPv4 copy: content: 'Acquire::ForceIPv4 "true";' @@ -80,6 +93,10 @@ dest: /tmp/k3s_install.sh mode: '0700' when: not k3s_binary.stat.exists + retries: 5 + delay: 10 + register: k3s_script_download + until: k3s_script_download is success - name: Get private IP from private network interface shell: ip -4 addr show enp7s0 | grep -oP '(?<=inet\s)\d+(\.\d+){3}' @@ -106,7 +123,7 @@ K3S_URL=https://{{ master_ip }}:6443 \ K3S_TOKEN="{{ k3s_join_token }}" \ INSTALL_K3S_EXEC="--node-ip {{ worker_private_ip.stdout }} --flannel-iface=enp7s0 --kubelet-arg cloud-provider=external" \ - /tmp/k3s_install.sh 2>&1 | tee /tmp/k3s-install.log + /tmp/k3s_install.sh args: creates: /usr/local/bin/k3s when: not k3s_binary.stat.exists @@ -114,6 +131,16 @@ async: 300 poll: 10 +- name: Verify K3s was installed successfully + stat: + path: /usr/local/bin/k3s + register: k3s_binary_check + +- name: Fail if K3s installation failed + fail: + msg: "K3s installation failed. Check /tmp/k3s-install.log on the worker node." + when: not k3s_binary_check.stat.exists + - name: Display K3s installation output debug: msg: "{{ k3s_install_output.stdout_lines }}" From f032b736f3b4217a7959c31b16d17a787c490f9f Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 5 Mar 2026 08:09:16 +0100 Subject: [PATCH 30/58] Move network fixes before apt cache update in site.yml The IPv4/DNS fixes were in the roles which run AFTER the initial apt cache update in site.yml. This caused the first apt update to hang on IPv6. Now DNS resolvers, IPv4 preference, and apt IPv4 config are set before any apt operations. Also adds DNS diagnostics for debugging. --- .../ansible/roles/k3s_master/tasks/main.yml | 24 -------------- .../ansible/roles/k3s_worker/tasks/main.yml | 24 -------------- .../ProcessCube.Cloud/ansible/site.yml | 33 +++++++++++++++++++ 3 files changed, 33 insertions(+), 48 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index c4ee50a..92fadef 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -1,28 +1,4 @@ --- -- name: Prefer IPv4 over IPv6 system-wide - lineinfile: - path: /etc/gai.conf - line: "precedence ::ffff:0:0/96 100" - create: yes - -- name: Configure reliable DNS resolvers - copy: - content: | - [Resolve] - DNS=185.12.64.1 185.12.64.2 1.1.1.1 - FallbackDNS=8.8.8.8 8.8.4.4 - dest: /etc/systemd/resolved.conf - -- name: Restart systemd-resolved - systemd: - name: systemd-resolved - state: restarted - -- name: Force apt to use IPv4 - copy: - content: 'Acquire::ForceIPv4 "true";' - dest: /etc/apt/apt.conf.d/99force-ipv4 - - name: Install required packages apt: name: diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index 199c026..66991fc 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -1,28 +1,4 @@ --- -- name: Prefer IPv4 over IPv6 system-wide - lineinfile: - path: /etc/gai.conf - line: "precedence ::ffff:0:0/96 100" - create: yes - -- name: Configure reliable DNS resolvers - copy: - content: | - [Resolve] - DNS=185.12.64.1 185.12.64.2 1.1.1.1 - FallbackDNS=8.8.8.8 8.8.4.4 - dest: /etc/systemd/resolved.conf - -- name: Restart systemd-resolved - systemd: - name: systemd-resolved - state: restarted - -- name: Force apt to use IPv4 - copy: - content: 'Acquire::ForceIPv4 "true";' - dest: /etc/apt/apt.conf.d/99force-ipv4 - - name: Install required packages apt: name: diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml index c506001..31c6b66 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml @@ -9,6 +9,39 @@ wait_for_connection: timeout: 300 + - name: Prefer IPv4 over IPv6 system-wide + lineinfile: + path: /etc/gai.conf + line: "precedence ::ffff:0:0/96 100" + create: yes + + - name: Configure reliable DNS resolvers + copy: + content: | + [Resolve] + DNS=185.12.64.1 185.12.64.2 1.1.1.1 + FallbackDNS=8.8.8.8 8.8.4.4 + dest: /etc/systemd/resolved.conf + + - name: Restart systemd-resolved + systemd: + name: systemd-resolved + state: restarted + + - name: Force apt to use IPv4 + copy: + content: 'Acquire::ForceIPv4 "true";' + dest: /etc/apt/apt.conf.d/99force-ipv4 + + - name: Verify DNS resolution + shell: resolvectl status && dig +short google.com + register: dns_check + changed_when: false + + - name: Display DNS status + debug: + var: dns_check.stdout_lines + - name: Update apt cache apt: update_cache: yes From bda9bb87a8d0f4e5ad38caa243f84851331c439a Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 5 Mar 2026 08:35:56 +0100 Subject: [PATCH 31/58] Replace modprobe module with shell command The modprobe ansible module requires the community.general collection which is not installed. Use shell modprobe command instead. --- .../ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml index 48f7c25..f652022 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml @@ -12,12 +12,11 @@ - /var/log/k3s - name: Load required kernel modules - modprobe: - name: "{{ item }}" - state: present + shell: modprobe {{ item }} loop: - br_netfilter - overlay + changed_when: false - name: Ensure kernel modules are loaded on boot copy: From 8c0293e40f8684f90b85fe4596f894c6d4e8329b Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 5 Mar 2026 08:44:29 +0100 Subject: [PATCH 32/58] Replace community.general modules with shell commands The sysctl and ufw ansible modules require the community.general collection which is not installed. Replace with equivalent shell commands using the ufw and sysctl CLI tools. --- .../ansible/roles/k3s_master/tasks/main.yml | 55 ++++--------------- .../ansible/roles/k3s_user/tasks/main.yml | 19 ++++--- .../ansible/roles/k3s_worker/tasks/main.yml | 47 ++++------------ 3 files changed, 33 insertions(+), 88 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 92fadef..3ede516 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -17,49 +17,18 @@ sed -i '/ swap / s/^/#/' /etc/fstab changed_when: false -- name: Configure firewall rules for specific ports from anywhere - ufw: - rule: allow - port: "{{ item }}" - proto: tcp - loop: - - "22" - - "80" - - "443" - -- name: Allow K3s API from internal network only - ufw: - rule: allow - port: "6443" - proto: tcp - from_ip: 10.0.0.0/16 - -- name: Allow kubelet from internal network only - ufw: - rule: allow - port: "10250" - proto: tcp - from_ip: 10.0.0.0/16 - -- name: Allow internal cluster network traffic - ufw: - rule: allow - from_ip: 10.0.0.0/16 - -- name: Allow Pod network traffic - ufw: - rule: allow - from_ip: 10.42.0.0/16 - -- name: Allow Service network traffic - ufw: - rule: allow - from_ip: 10.43.0.0/16 - -- name: Enable firewall - ufw: - state: enabled - policy: deny +- name: Configure firewall rules + shell: | + ufw allow 22/tcp + ufw allow 80/tcp + ufw allow 443/tcp + ufw allow from 10.0.0.0/16 to any port 6443 proto tcp + ufw allow from 10.0.0.0/16 to any port 10250 proto tcp + ufw allow from 10.0.0.0/16 + ufw allow from 10.42.0.0/16 + ufw allow from 10.43.0.0/16 + ufw default deny incoming + ufw --force enable - name: Check if K3s is already installed stat: diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml index f652022..2194f88 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_user/tasks/main.yml @@ -29,12 +29,13 @@ mode: '0644' - name: Configure sysctl for k3s - sysctl: - name: "{{ item.key }}" - value: "{{ item.value }}" - state: present - reload: yes - loop: - - { key: 'net.ipv4.ip_forward', value: '1' } - - { key: 'net.bridge.bridge-nf-call-iptables', value: '1' } - - { key: 'net.bridge.bridge-nf-call-ip6tables', value: '1' } + copy: + content: | + net.ipv4.ip_forward = 1 + net.bridge.bridge-nf-call-iptables = 1 + net.bridge.bridge-nf-call-ip6tables = 1 + dest: /etc/sysctl.d/99-k3s.conf + +- name: Apply sysctl settings + shell: sysctl --system + changed_when: false diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index 66991fc..365d064 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -15,42 +15,17 @@ sed -i '/ swap / s/^/#/' /etc/fstab changed_when: false -- name: Configure firewall rules for specific ports from anywhere - ufw: - rule: allow - port: "{{ item }}" - proto: tcp - loop: - - "22" - - "80" - - "443" - -- name: Allow kubelet from internal network only - ufw: - rule: allow - port: "10250" - proto: tcp - from_ip: 10.0.0.0/16 - -- name: Allow internal cluster network traffic - ufw: - rule: allow - from_ip: 10.0.0.0/16 - -- name: Allow Pod network traffic - ufw: - rule: allow - from_ip: 10.42.0.0/16 - -- name: Allow Service network traffic - ufw: - rule: allow - from_ip: 10.43.0.0/16 - -- name: Enable firewall - ufw: - state: enabled - policy: deny +- name: Configure firewall rules + shell: | + ufw allow 22/tcp + ufw allow 80/tcp + ufw allow 443/tcp + ufw allow from 10.0.0.0/16 to any port 10250 proto tcp + ufw allow from 10.0.0.0/16 + ufw allow from 10.42.0.0/16 + ufw allow from 10.43.0.0/16 + ufw default deny incoming + ufw --force enable - name: Wait for master node to be ready wait_for: From 74e38fff6109aae25e07c6b8f732d454b324d77d Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Sat, 7 Mar 2026 10:47:17 +0100 Subject: [PATCH 33/58] Remove Cuby and ArgoCD from Hetzner deployment --- hetzner-setup/ProcessCube.Cloud/.gitignore | 1 - .../ansible/roles/argocd/tasks/main.yml | 51 ---- .../ansible/roles/install_cuby/tasks/main.yml | 76 ------ .../templates/cuby-operator.yaml.j2 | 234 ------------------ .../ProcessCube.Cloud/ansible/site.yml | 14 -- hetzner-setup/ProcessCube.Cloud/outputs.tf | 5 - 6 files changed, 381 deletions(-) delete mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml delete mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml delete mode 100644 hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 diff --git a/hetzner-setup/ProcessCube.Cloud/.gitignore b/hetzner-setup/ProcessCube.Cloud/.gitignore index 9742754..ec24f27 100644 --- a/hetzner-setup/ProcessCube.Cloud/.gitignore +++ b/hetzner-setup/ProcessCube.Cloud/.gitignore @@ -30,5 +30,4 @@ id_rsa* Thumbs.db # Generated files -cuby_domain.txt ansible/inventory/hosts diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml deleted file mode 100644 index b0be09e..0000000 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/argocd/tasks/main.yml +++ /dev/null @@ -1,51 +0,0 @@ ---- -- name: Create ArgoCD namespace - shell: kubectl create namespace argocd --dry-run=client -o yaml | kubectl apply -f - - changed_when: true - -- name: Install ArgoCD - shell: | - kubectl apply --server-side -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml - register: argocd_install - changed_when: "'created' in argocd_install.stdout or 'configured' in argocd_install.stdout" - -- name: Wait for ArgoCD pods to be ready - shell: | - ready_count=$(kubectl get pods -n argocd --field-selector=status.phase=Running --no-headers | wc -l) - total_count=$(kubectl get pods -n argocd --no-headers | wc -l) - if [ "$ready_count" -eq "$total_count" ] && [ "$total_count" -gt 0 ]; then - exit 0 - else - exit 1 - fi - args: - executable: /bin/bash - register: argocd_status - until: argocd_status.rc == 0 - retries: 60 - delay: 10 - changed_when: false - -- name: Get ArgoCD initial admin password - shell: kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d - register: argocd_password - changed_when: false - -- name: Display ArgoCD installation info - debug: - msg: - - "ArgoCD installed successfully!" - - "" - - "Access ArgoCD UI via port-forward:" - - " kubectl port-forward svc/argocd-server -n argocd 8080:443" - - " Then open: https://localhost:8080" - - "" - - "Login credentials:" - - " Username: admin" - - " Password: {{ argocd_password.stdout }}" - - "" - - "IMPORTANT: Change the admin password after first login!" - - "Using ArgoCD CLI:" - - " kubectl port-forward svc/argocd-server -n argocd 8080:443 &" - - " argocd login localhost:8080" - - " argocd account update-password" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml deleted file mode 100644 index ca68a82..0000000 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/tasks/main.yml +++ /dev/null @@ -1,76 +0,0 @@ ---- -- name: Create processcube namespace - shell: kubectl create namespace processcube --dry-run=client -o yaml | kubectl apply -f - - changed_when: true - -- name: Create ProcessCube Marketplace ImagePull Secret - shell: | - kubectl create secret docker-registry regcred \ - --docker-server=https://marketplace.processcube.io \ - --docker-username=processcube \ - --docker-password="{{ processcube_api_key }}" \ - -n processcube \ - --dry-run=client -o yaml | kubectl apply -f - - changed_when: true - -- name: Create ProcessCube API Key Secret - shell: | - kubectl create secret generic processcube-api-key \ - --from-literal=api-key="{{ processcube_api_key }}" \ - -n processcube \ - --dry-run=client -o yaml | kubectl apply -f - - changed_when: true - -- name: Get LoadBalancer IP from ingress-nginx - shell: | - kubectl get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.status.loadBalancer.ingress[0].ip}' - register: loadbalancer_ip_result - until: loadbalancer_ip_result.stdout != "" - retries: 30 - delay: 10 - changed_when: false - -- name: Set cuby_domain fact - set_fact: - cuby_domain: "cuby.{{ loadbalancer_ip_result.stdout }}.nip.io" - -- name: Save cuby_domain to local file - copy: - content: "{{ cuby_domain }}" - dest: "{{ playbook_dir }}/../cuby_domain.txt" - mode: '0644' - delegate_to: localhost - become: false - -- name: Template Cuby Operator manifest - template: - src: cuby-operator.yaml.j2 - dest: /tmp/cuby-operator.yaml - mode: '0644' - -- name: Deploy Cuby Operator - shell: kubectl apply -f /tmp/cuby-operator.yaml - register: cuby_deploy - changed_when: "'created' in cuby_deploy.stdout or 'configured' in cuby_deploy.stdout" - -- name: Wait for Cuby Operator to be ready - shell: | - kubectl rollout status deployment/cuby-operator -n processcube --timeout=120s - register: cuby_status - changed_when: false - -- name: Remove temporary manifest - file: - path: /tmp/cuby-operator.yaml - state: absent - -- name: Display Cuby setup info - debug: - msg: - - "Cuby Operator deployed successfully in namespace 'processcube'" - - "" - - "Available secrets:" - - " - regcred (ImagePull Secret)" - - " - processcube-api-key (API Key Secret)" - - "" - - "Cuby is available at: https://{{ cuby_domain }}" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 b/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 deleted file mode 100644 index b68a948..0000000 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/install_cuby/templates/cuby-operator.yaml.j2 +++ /dev/null @@ -1,234 +0,0 @@ -# Cuby Operator Kubernetes Manifests -# Managed by Ansible - ---- -# ServiceAccount -apiVersion: v1 -kind: ServiceAccount -metadata: - name: cuby-operator - namespace: processcube - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator - ---- -# ClusterRole - permissions to manage ProcessCube resources -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cuby-operator - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator -rules: - # Manage Deployments, Services, ConfigMaps, Secrets - - apiGroups: [""] - resources: ["services", "configmaps", "secrets", "persistentvolumeclaims"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - - apiGroups: ["apps"] - resources: ["deployments", "statefulsets"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - # Manage Ingress - - apiGroups: ["networking.k8s.io"] - resources: ["ingresses"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - # Read Pods and Logs - - apiGroups: [""] - resources: ["pods", "pods/log"] - verbs: ["get", "list", "watch"] - # Read Nodes (for status) - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get", "list"] - # Read Namespaces - - apiGroups: [""] - resources: ["namespaces"] - verbs: ["get", "list", "watch", "create"] - ---- -# ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cuby-operator - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cuby-operator -subjects: - - kind: ServiceAccount - name: cuby-operator - namespace: processcube - ---- -# ConfigMap for Cuby configuration -apiVersion: v1 -kind: ConfigMap -metadata: - name: cuby-config - namespace: processcube - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator -data: - # Configuration will be mounted to /data/config.json - config.json: | - { - "deploymentTarget": "cloud", - "setupComplete": false - } - ---- -# PersistentVolumeClaim for Cuby data -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: cuby-data - namespace: processcube - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi - storageClassName: hcloud-volumes - ---- -# Deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: cuby-operator - namespace: processcube - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator - template: - metadata: - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator - spec: - serviceAccountName: cuby-operator - imagePullSecrets: - - name: regcred - containers: - - name: cuby - image: marketplace.processcube.io/5minds/cuby:0.6.0-develop.6 - imagePullPolicy: Always - ports: - - name: http - containerPort: 3847 - protocol: TCP - env: - - name: NODE_ENV - value: "production" - - name: NODE_EXTRA_CA_CERTS - value: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - - name: CUBY_MODE - value: "operator" - - name: CUBY_CONFIG_DIR - value: "/data" - - name: PORT - value: "3847" - - name: KUBERNETES_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - volumeMounts: - - name: data - mountPath: /data - - name: config - mountPath: /data/config.json - subPath: config.json - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - livenessProbe: - httpGet: - path: /api/health - port: http - initialDelaySeconds: 10 - periodSeconds: 30 - readinessProbe: - httpGet: - path: /api/health - port: http - initialDelaySeconds: 5 - periodSeconds: 10 - volumes: - - name: data - persistentVolumeClaim: - claimName: cuby-data - - name: config - configMap: - name: cuby-config - ---- -# Service -apiVersion: v1 -kind: Service -metadata: - name: cuby-operator - namespace: processcube - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator -spec: - type: ClusterIP - ports: - - name: http - port: 80 - targetPort: http - protocol: TCP - selector: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator - ---- -# Ingress -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: cuby-operator - namespace: processcube - labels: - app.kubernetes.io/name: cuby - app.kubernetes.io/component: operator - annotations: - cert-manager.io/cluster-issuer: letsencrypt-production - nginx.ingress.kubernetes.io/proxy-body-size: "50m" -spec: - ingressClassName: nginx - tls: - - hosts: - - {{ cuby_domain }} - secretName: cuby-tls - rules: - - host: {{ cuby_domain }} - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: cuby-operator - port: - name: http diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml index 31c6b66..b4bb105 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml @@ -115,20 +115,6 @@ - role: external_secrets when: onepassword_credentials_json is defined and onepassword_credentials_json != "" -- name: Install ArgoCD - hosts: k3s_master - gather_facts: no - become: yes - roles: - - argocd - -- name: Install Cuby - hosts: k3s_master - gather_facts: no - become: yes - roles: - - install_cuby - - name: Verify Cluster hosts: k3s_master gather_facts: no diff --git a/hetzner-setup/ProcessCube.Cloud/outputs.tf b/hetzner-setup/ProcessCube.Cloud/outputs.tf index 48a120e..b7ae626 100644 --- a/hetzner-setup/ProcessCube.Cloud/outputs.tf +++ b/hetzner-setup/ProcessCube.Cloud/outputs.tf @@ -51,8 +51,3 @@ output "ssh_commands" { workers = [for worker in hcloud_server.k3s_worker : "ssh root@${worker.ipv4_address}"] } } - -output "cuby_url" { - description = "URL to access Cuby operator" - value = fileexists("${path.module}/cuby_domain.txt") ? "https://${trimspace(file("${path.module}/cuby_domain.txt"))}" : "Run 'terraform apply' to deploy the cluster first" -} From 25deecc11f9365c288a38756fd7e21373985c147 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 11:23:21 +0200 Subject: [PATCH 34/58] Replace Nginx Ingress with Traefik v3 and Gateway API Migrates routing from Ingress resources (nginx) to HTTPRoutes with a Gateway API setup using Traefik v3, including cert-manager upgrade to v1.20.2 and corresponding Ansible playbook updates for Hetzner. Co-Authored-By: Claude Sonnet 4.6 --- base/authority/httproute.yml | 18 +++ base/authority/ingress.yml | 22 ---- base/authority/kustomization.yaml | 2 +- base/engine/httproute.yml | 18 +++ base/engine/ingress.yml | 22 ---- base/engine/kustomization.yaml | 2 +- base/gateway.yml | 48 +++++++ base/http-redirect.yml | 14 ++ base/kustomization.yaml | 4 +- base/lowcode/httproute.yaml | 18 +++ base/lowcode/ingress.yaml | 22 ---- base/lowcode/kustomization.yaml | 2 +- .../ansible/roles/k3s_addons/tasks/main.yml | 123 ++++++++++++------ .../dev/authority/httproute-patch.yml | 3 + .../overlays/dev/authority/ingress-patch.yml | 6 - .../overlays/dev/engine/httproute-patch.yml | 3 + sample/overlays/dev/engine/ingress-patch.yml | 6 - sample/overlays/dev/gateway-patch.yml | 9 ++ sample/overlays/dev/kustomization.yaml | 23 ++-- .../overlays/dev/lowcode/httproute-patch.yml | 3 + sample/overlays/dev/lowcode/ingress-patch.yml | 6 - 21 files changed, 240 insertions(+), 134 deletions(-) create mode 100644 base/authority/httproute.yml delete mode 100644 base/authority/ingress.yml create mode 100644 base/engine/httproute.yml delete mode 100644 base/engine/ingress.yml create mode 100644 base/gateway.yml create mode 100644 base/http-redirect.yml create mode 100644 base/lowcode/httproute.yaml delete mode 100644 base/lowcode/ingress.yaml create mode 100644 sample/overlays/dev/authority/httproute-patch.yml delete mode 100644 sample/overlays/dev/authority/ingress-patch.yml create mode 100644 sample/overlays/dev/engine/httproute-patch.yml delete mode 100644 sample/overlays/dev/engine/ingress-patch.yml create mode 100644 sample/overlays/dev/gateway-patch.yml create mode 100644 sample/overlays/dev/lowcode/httproute-patch.yml delete mode 100644 sample/overlays/dev/lowcode/ingress-patch.yml diff --git a/base/authority/httproute.yml b/base/authority/httproute.yml new file mode 100644 index 0000000..d1d398b --- /dev/null +++ b/base/authority/httproute.yml @@ -0,0 +1,18 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: authority +spec: + parentRefs: + - name: processcube-gateway + sectionName: websecure-authority + hostnames: + - authority.dev.5minds.cloud + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: authority + port: 11560 diff --git a/base/authority/ingress.yml b/base/authority/ingress.yml deleted file mode 100644 index 0f76e20..0000000 --- a/base/authority/ingress.yml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: authority - annotations: - cert-manager.io/cluster-issuer: letsencrypt-production -spec: - ingressClassName: nginx - tls: - - hosts: - - authority.dev.5minds.cloud - secretName: authority-ingress-tls - rules: - - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: authority - port: - number: 11560 \ No newline at end of file diff --git a/base/authority/kustomization.yaml b/base/authority/kustomization.yaml index ce0da50..d229f00 100644 --- a/base/authority/kustomization.yaml +++ b/base/authority/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - ingress.yml + - httproute.yml - service.yml - deployment.yml diff --git a/base/engine/httproute.yml b/base/engine/httproute.yml new file mode 100644 index 0000000..ce46fc9 --- /dev/null +++ b/base/engine/httproute.yml @@ -0,0 +1,18 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: engine +spec: + parentRefs: + - name: processcube-gateway + sectionName: websecure-engine + hostnames: + - engine.dev.5minds.cloud + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: engine + port: 10560 diff --git a/base/engine/ingress.yml b/base/engine/ingress.yml deleted file mode 100644 index da43dae..0000000 --- a/base/engine/ingress.yml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: engine - annotations: - cert-manager.io/cluster-issuer: letsencrypt-production -spec: - ingressClassName: nginx - tls: - - hosts: - - engine.dev.5minds.cloud - secretName: engine-ingress-tls - rules: - - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: engine - port: - number: 10560 \ No newline at end of file diff --git a/base/engine/kustomization.yaml b/base/engine/kustomization.yaml index d324083..d9a5f31 100644 --- a/base/engine/kustomization.yaml +++ b/base/engine/kustomization.yaml @@ -2,6 +2,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - ingress.yml + - httproute.yml - service.yml - deployment.yml \ No newline at end of file diff --git a/base/gateway.yml b/base/gateway.yml new file mode 100644 index 0000000..80fd286 --- /dev/null +++ b/base/gateway.yml @@ -0,0 +1,48 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: processcube-gateway + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production +spec: + gatewayClassName: traefik + listeners: + - name: web + port: 80 + protocol: HTTP + allowedRoutes: + namespaces: + from: Same + - name: websecure-engine + port: 443 + protocol: HTTPS + hostname: engine.dev.5minds.cloud + tls: + mode: Terminate + certificateRefs: + - name: engine-tls + allowedRoutes: + namespaces: + from: Same + - name: websecure-authority + port: 443 + protocol: HTTPS + hostname: authority.dev.5minds.cloud + tls: + mode: Terminate + certificateRefs: + - name: authority-tls + allowedRoutes: + namespaces: + from: Same + - name: websecure-nodered + port: 443 + protocol: HTTPS + hostname: nodered.dev.5minds.cloud + tls: + mode: Terminate + certificateRefs: + - name: nodered-tls + allowedRoutes: + namespaces: + from: Same diff --git a/base/http-redirect.yml b/base/http-redirect.yml new file mode 100644 index 0000000..40581e0 --- /dev/null +++ b/base/http-redirect.yml @@ -0,0 +1,14 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: http-to-https-redirect +spec: + parentRefs: + - name: processcube-gateway + sectionName: web + rules: + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + statusCode: 301 diff --git a/base/kustomization.yaml b/base/kustomization.yaml index 55bf731..66d15d0 100644 --- a/base/kustomization.yaml +++ b/base/kustomization.yaml @@ -6,4 +6,6 @@ resources: - authority/ - engine/ - lowcode/ - - postgres/ \ No newline at end of file + - postgres/ + - gateway.yml + - http-redirect.yml \ No newline at end of file diff --git a/base/lowcode/httproute.yaml b/base/lowcode/httproute.yaml new file mode 100644 index 0000000..423a1ac --- /dev/null +++ b/base/lowcode/httproute.yaml @@ -0,0 +1,18 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: nodered +spec: + parentRefs: + - name: processcube-gateway + sectionName: websecure-nodered + hostnames: + - nodered.dev.5minds.cloud + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: nodered + port: 30000 diff --git a/base/lowcode/ingress.yaml b/base/lowcode/ingress.yaml deleted file mode 100644 index cdad3f4..0000000 --- a/base/lowcode/ingress.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodered - annotations: - cert-manager.io/cluster-issuer: letsencrypt-production -spec: - ingressClassName: nginx - tls: - - hosts: - - nodered.dev.5minds.cloud - secretName: nodered-ingress-tls - rules: - - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: nodered - port: - number: 30000 diff --git a/base/lowcode/kustomization.yaml b/base/lowcode/kustomization.yaml index eb1b58a..71d6e0b 100644 --- a/base/lowcode/kustomization.yaml +++ b/base/lowcode/kustomization.yaml @@ -1,6 +1,6 @@ resources: - deployment.yaml -- ingress.yaml +- httproute.yaml - service.yaml - pvc.yaml diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index b358bf2..ea30e10 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -39,35 +39,77 @@ shell: kubectl label nodes --all loadbalancer-target=true --overwrite changed_when: true -- name: Download Nginx Ingress Controller manifest - get_url: - url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.14.0/deploy/static/provider/cloud/deploy.yaml - dest: /tmp/ingress-nginx-deploy.yaml +- name: Install Gateway API CRDs + shell: kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml + register: gateway_api_install + changed_when: "'created' in gateway_api_install.stdout or 'configured' in gateway_api_install.stdout" + +- name: Wait for Gateway API CRDs to be established + shell: kubectl wait --for=condition=Established crd/gateways.gateway.networking.k8s.io --timeout=60s + register: gateway_crd_wait + until: gateway_crd_wait.rc == 0 + retries: 12 + delay: 5 + changed_when: false + +- name: Add Traefik Helm repository + shell: helm repo add traefik https://traefik.github.io/charts && helm repo update + changed_when: false + +- name: Create traefik namespace + shell: kubectl create namespace traefik --dry-run=client -o yaml | kubectl apply -f - + changed_when: true + +- name: Write Traefik v3 Helm values + copy: + dest: /tmp/traefik-values.yaml mode: '0644' + content: | + deployment: + kind: DaemonSet -- name: Patch Nginx Ingress Controller to use DaemonSet + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + + ports: + web: + port: 80 + exposedPort: 80 + websecure: + port: 443 + exposedPort: 443 + + service: + type: LoadBalancer + + providers: + kubernetesGateway: + enabled: true + kubernetesIngress: + enabled: false + + gatewayClass: + enabled: true + name: traefik + + rbac: + enabled: true + +- name: Install Traefik v3 with Gateway API support shell: | - # Convert Deployment to DaemonSet - sed -i 's/kind: Deployment/kind: DaemonSet/' /tmp/ingress-nginx-deploy.yaml - # Remove replicas field (not valid for DaemonSet) - sed -i '/replicas:/d' /tmp/ingress-nginx-deploy.yaml - # Remove strategy field (not valid for DaemonSet) - sed -i '/strategy:/,/type:/d' /tmp/ingress-nginx-deploy.yaml - # Use host network for direct port binding - sed -i '/dnsPolicy: ClusterFirst/i\ hostNetwork: true' /tmp/ingress-nginx-deploy.yaml - args: - executable: /bin/bash + helm upgrade --install traefik traefik/traefik \ + --namespace traefik \ + --version 39.0.7 \ + --values /tmp/traefik-values.yaml \ + --wait \ + --timeout 5m + register: traefik_install changed_when: true -- name: Install Nginx Ingress Controller as DaemonSet - shell: kubectl apply -f /tmp/ingress-nginx-deploy.yaml - register: nginx_install - changed_when: "'created' in nginx_install.stdout or 'configured' in nginx_install.stdout" - -- name: Wait for Nginx Ingress Controller to be ready on all nodes +- name: Wait for Traefik DaemonSet to be ready on all nodes shell: | node_count=$(kubectl get nodes --no-headers | wc -l) - ready_count=$(kubectl get pods -n ingress-nginx -l app.kubernetes.io/component=controller --field-selector=status.phase=Running --no-headers | wc -l) + ready_count=$(kubectl get pods -n traefik -l app.kubernetes.io/name=traefik --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l) if [ "$ready_count" -eq "$node_count" ]; then exit 0 else @@ -75,16 +117,16 @@ fi args: executable: /bin/bash - register: nginx_status - until: nginx_status.rc == 0 + register: traefik_status + until: traefik_status.rc == 0 retries: 30 delay: 10 changed_when: false -- name: Annotate Nginx Ingress Service for Hetzner LoadBalancer +- name: Annotate Traefik Service for Hetzner LoadBalancer shell: | - kubectl annotate service ingress-nginx-controller \ - -n ingress-nginx \ + kubectl annotate service traefik \ + -n traefik \ load-balancer.hetzner.cloud/location="{{ location }}" \ load-balancer.hetzner.cloud/use-private-ip="true" \ load-balancer.hetzner.cloud/uses-proxyprotocol="false" \ @@ -101,7 +143,7 @@ seconds: 15 - name: Wait for LoadBalancer to be provisioned - shell: kubectl get svc ingress-nginx-controller -n ingress-nginx -o jsonpath='{.status.loadBalancer.ingress[0].ip}' + shell: kubectl get svc traefik -n traefik -o jsonpath='{.status.loadBalancer.ingress[0].ip}' register: lb_ip until: lb_ip.stdout != "" retries: 60 @@ -114,7 +156,7 @@ - name: Install cert-manager shell: | - kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.3/cert-manager.yaml + kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.20.2/cert-manager.yaml register: certmanager_install changed_when: "'created' in certmanager_install.stdout or 'configured' in certmanager_install.stdout" @@ -170,8 +212,11 @@ name: letsencrypt-staging-key solvers: - http01: - ingress: - class: nginx + gatewayHTTPRoute: + parentRefs: + - name: processcube-gateway + kind: Gateway + group: gateway.networking.k8s.io EOF changed_when: true @@ -190,8 +235,11 @@ name: letsencrypt-production-key solvers: - http01: - ingress: - class: nginx + gatewayHTTPRoute: + parentRefs: + - name: processcube-gateway + kind: Gateway + group: gateway.networking.k8s.io EOF changed_when: true @@ -199,17 +247,18 @@ debug: msg: - "Hetzner CSI Driver installed successfully" - - "Nginx Ingress Controller installed successfully" + - "Traefik v3 Gateway Controller installed successfully" + - "Gateway API CRDs (v1.2.0) installed successfully" - "LoadBalancer created automatically by CCM" - - "cert-manager installed successfully" + - "cert-manager v1.20.2 installed successfully" - "Let's Encrypt ClusterIssuers configured:" - " - letsencrypt-staging (for testing)" - " - letsencrypt-production (for production use)" - "" - "LoadBalancer IP: {{ lb_ip.stdout }}" - - "Ingress available at: http://{{ lb_ip.stdout }}" + - "Gateway available at: http://{{ lb_ip.stdout }}" - "" - - "To use TLS certificates, add these annotations to your Ingress:" + - "To use TLS certificates, annotate your Gateway resource with:" - " cert-manager.io/cluster-issuer: letsencrypt-staging" - " or" - " cert-manager.io/cluster-issuer: letsencrypt-production" diff --git a/sample/overlays/dev/authority/httproute-patch.yml b/sample/overlays/dev/authority/httproute-patch.yml new file mode 100644 index 0000000..2ff554a --- /dev/null +++ b/sample/overlays/dev/authority/httproute-patch.yml @@ -0,0 +1,3 @@ +- op: replace + path: /spec/hostnames/0 + value: authority.sampleapp.dev.5minds.cloud diff --git a/sample/overlays/dev/authority/ingress-patch.yml b/sample/overlays/dev/authority/ingress-patch.yml deleted file mode 100644 index 64d4b6c..0000000 --- a/sample/overlays/dev/authority/ingress-patch.yml +++ /dev/null @@ -1,6 +0,0 @@ -- op: replace #action - path: "/spec/rules/0/host" #resource we want to change - value: authority.sampleapp.dev.5minds.cloud -- op: replace #action - path: "/spec/tls/0/hosts/0" #resource we want to change - value: authority.sampleapp.dev.5minds.cloud \ No newline at end of file diff --git a/sample/overlays/dev/engine/httproute-patch.yml b/sample/overlays/dev/engine/httproute-patch.yml new file mode 100644 index 0000000..aa0ae60 --- /dev/null +++ b/sample/overlays/dev/engine/httproute-patch.yml @@ -0,0 +1,3 @@ +- op: replace + path: /spec/hostnames/0 + value: engine.sampleapp.dev.5minds.cloud diff --git a/sample/overlays/dev/engine/ingress-patch.yml b/sample/overlays/dev/engine/ingress-patch.yml deleted file mode 100644 index 6014a12..0000000 --- a/sample/overlays/dev/engine/ingress-patch.yml +++ /dev/null @@ -1,6 +0,0 @@ -- op: replace #action - path: "/spec/rules/0/host" #resource we want to change - value: engine.sampleapp.dev.5minds.cloud -- op: replace #action - path: "/spec/tls/0/hosts/0" #resource we want to change - value: engine.sampleapp.dev.5minds.cloud \ No newline at end of file diff --git a/sample/overlays/dev/gateway-patch.yml b/sample/overlays/dev/gateway-patch.yml new file mode 100644 index 0000000..4abeb04 --- /dev/null +++ b/sample/overlays/dev/gateway-patch.yml @@ -0,0 +1,9 @@ +- op: replace + path: /spec/listeners/1/hostname + value: engine.sampleapp.dev.5minds.cloud +- op: replace + path: /spec/listeners/2/hostname + value: authority.sampleapp.dev.5minds.cloud +- op: replace + path: /spec/listeners/3/hostname + value: lowcode.sampleapp.dev.5minds.cloud diff --git a/sample/overlays/dev/kustomization.yaml b/sample/overlays/dev/kustomization.yaml index 063ed7e..1bcbda5 100644 --- a/sample/overlays/dev/kustomization.yaml +++ b/sample/overlays/dev/kustomization.yaml @@ -8,30 +8,35 @@ resources: patches: - target: - group: networking.k8s.io - kind: Ingress + group: gateway.networking.k8s.io + kind: Gateway + name: processcube-gateway + path: ./gateway-patch.yml + - target: + group: gateway.networking.k8s.io + kind: HTTPRoute name: engine - path: ./engine/ingress-patch.yml + path: ./engine/httproute-patch.yml - target: group: apps kind: Deployment name: engine path: ./engine/deployment-patch.yml - target: - group: networking.k8s.io - kind: Ingress + group: gateway.networking.k8s.io + kind: HTTPRoute name: authority - path: ./authority/ingress-patch.yml + path: ./authority/httproute-patch.yml - target: group: apps kind: Deployment name: nodered path: ./lowcode/deployment-patch.yml - target: - group: networking.k8s.io - kind: Ingress + group: gateway.networking.k8s.io + kind: HTTPRoute name: nodered - path: ./lowcode/ingress-patch.yml + path: ./lowcode/httproute-patch.yml configMapGenerator: diff --git a/sample/overlays/dev/lowcode/httproute-patch.yml b/sample/overlays/dev/lowcode/httproute-patch.yml new file mode 100644 index 0000000..7886ed8 --- /dev/null +++ b/sample/overlays/dev/lowcode/httproute-patch.yml @@ -0,0 +1,3 @@ +- op: replace + path: /spec/hostnames/0 + value: lowcode.sampleapp.dev.5minds.cloud diff --git a/sample/overlays/dev/lowcode/ingress-patch.yml b/sample/overlays/dev/lowcode/ingress-patch.yml deleted file mode 100644 index a70977c..0000000 --- a/sample/overlays/dev/lowcode/ingress-patch.yml +++ /dev/null @@ -1,6 +0,0 @@ -- op: replace #action - path: "/spec/rules/0/host" #resource we want to change - value: lowcode.sampleapp.dev.5minds.cloud -- op: replace #action - path: "/spec/tls/0/hosts/0" #resource we want to change - value: lowcode.sampleapp.dev.5minds.cloud \ No newline at end of file From bd438c3050ec7cf1adc01ece85fe3c5b5186e696 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 11:47:20 +0200 Subject: [PATCH 35/58] Fix Traefik Helm chart dnsPolicy schema error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move dnsPolicy under deployment section — top-level dnsPolicy is not allowed in traefik chart v39.x schema. Co-Authored-By: Claude Sonnet 4.6 --- .../ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index ea30e10..e92b1dc 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -67,9 +67,9 @@ content: | deployment: kind: DaemonSet + dnsPolicy: ClusterFirstWithHostNet hostNetwork: true - dnsPolicy: ClusterFirstWithHostNet ports: web: From 83428ff24746373fb591c77ec93987266f065c89 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 12:09:11 +0200 Subject: [PATCH 36/58] Fix Traefik port configuration for Helm chart v39 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use standard Traefik internal ports (8000/8443) instead of 80/443 — the chart template validates that port 8000 is declared. Also remove hostNetwork since it's not needed with a Hetzner LoadBalancer service. Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_addons/tasks/main.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index e92b1dc..0c6feee 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -67,16 +67,13 @@ content: | deployment: kind: DaemonSet - dnsPolicy: ClusterFirstWithHostNet - - hostNetwork: true ports: web: - port: 80 + port: 8000 exposedPort: 80 websecure: - port: 443 + port: 8443 exposedPort: 443 service: From 67e460cf11451e3cb3ca473c2c14e717a79897be Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 12:22:00 +0200 Subject: [PATCH 37/58] Increase Traefik Helm install timeout to 10m Fresh cluster image pulls + DaemonSet rollout exceed the 5m limit. Co-Authored-By: Claude Sonnet 4.6 --- .../ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index 0c6feee..a92584b 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -99,7 +99,7 @@ --version 39.0.7 \ --values /tmp/traefik-values.yaml \ --wait \ - --timeout 5m + --timeout 10m register: traefik_install changed_when: true From 6e7ed5c895d6b122e731d3490bc918ba126b9200 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 12:28:26 +0200 Subject: [PATCH 38/58] Use K3s built-in Traefik with Gateway API via HelmChartConfig Remove --disable traefik from K3s install. Install Gateway API CRDs and write a HelmChartConfig in k3s_master so K3s manages Traefik lifecycle automatically. Remove manual helm install from k3s_addons and update all namespace references from traefik to kube-system. Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_addons/tasks/main.yml | 70 +------------------ .../ansible/roles/k3s_master/tasks/main.yml | 55 ++++++++++++++- 2 files changed, 56 insertions(+), 69 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index a92584b..06418b3 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -39,74 +39,10 @@ shell: kubectl label nodes --all loadbalancer-target=true --overwrite changed_when: true -- name: Install Gateway API CRDs - shell: kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml - register: gateway_api_install - changed_when: "'created' in gateway_api_install.stdout or 'configured' in gateway_api_install.stdout" - -- name: Wait for Gateway API CRDs to be established - shell: kubectl wait --for=condition=Established crd/gateways.gateway.networking.k8s.io --timeout=60s - register: gateway_crd_wait - until: gateway_crd_wait.rc == 0 - retries: 12 - delay: 5 - changed_when: false - -- name: Add Traefik Helm repository - shell: helm repo add traefik https://traefik.github.io/charts && helm repo update - changed_when: false - -- name: Create traefik namespace - shell: kubectl create namespace traefik --dry-run=client -o yaml | kubectl apply -f - - changed_when: true - -- name: Write Traefik v3 Helm values - copy: - dest: /tmp/traefik-values.yaml - mode: '0644' - content: | - deployment: - kind: DaemonSet - - ports: - web: - port: 8000 - exposedPort: 80 - websecure: - port: 8443 - exposedPort: 443 - - service: - type: LoadBalancer - - providers: - kubernetesGateway: - enabled: true - kubernetesIngress: - enabled: false - - gatewayClass: - enabled: true - name: traefik - - rbac: - enabled: true - -- name: Install Traefik v3 with Gateway API support - shell: | - helm upgrade --install traefik traefik/traefik \ - --namespace traefik \ - --version 39.0.7 \ - --values /tmp/traefik-values.yaml \ - --wait \ - --timeout 10m - register: traefik_install - changed_when: true - - name: Wait for Traefik DaemonSet to be ready on all nodes shell: | node_count=$(kubectl get nodes --no-headers | wc -l) - ready_count=$(kubectl get pods -n traefik -l app.kubernetes.io/name=traefik --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l) + ready_count=$(kubectl get pods -n kube-system -l app.kubernetes.io/name=traefik --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l) if [ "$ready_count" -eq "$node_count" ]; then exit 0 else @@ -123,7 +59,7 @@ - name: Annotate Traefik Service for Hetzner LoadBalancer shell: | kubectl annotate service traefik \ - -n traefik \ + -n kube-system \ load-balancer.hetzner.cloud/location="{{ location }}" \ load-balancer.hetzner.cloud/use-private-ip="true" \ load-balancer.hetzner.cloud/uses-proxyprotocol="false" \ @@ -140,7 +76,7 @@ seconds: 15 - name: Wait for LoadBalancer to be provisioned - shell: kubectl get svc traefik -n traefik -o jsonpath='{.status.loadBalancer.ingress[0].ip}' + shell: kubectl get svc traefik -n kube-system -o jsonpath='{.status.loadBalancer.ingress[0].ip}' register: lb_ip until: lb_ip.stdout != "" retries: 60 diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 3ede516..27c83c3 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -66,7 +66,6 @@ --node-name {{ cluster_name }}-master \ --flannel-backend=vxlan \ --flannel-iface=enp7s0 \ - --disable traefik \ --disable servicelb \ --disable-cloud-controller \ --write-kubeconfig-mode 644 \ @@ -89,7 +88,6 @@ --node-name {{ cluster_name }}-master \ --flannel-backend=vxlan \ --flannel-iface=enp7s0 \ - --disable traefik \ --disable servicelb \ --disable-cloud-controller \ --write-kubeconfig-mode 644 \ @@ -154,6 +152,59 @@ delay: 10 changed_when: false +- name: Install Gateway API CRDs + shell: kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml + register: gateway_api_install + changed_when: "'created' in gateway_api_install.stdout or 'configured' in gateway_api_install.stdout" + +- name: Wait for Gateway API CRDs to be established + shell: kubectl wait --for=condition=Established crd/gateways.gateway.networking.k8s.io --timeout=60s + register: gateway_crd_wait + until: gateway_crd_wait.rc == 0 + retries: 12 + delay: 5 + changed_when: false + +- name: Configure Traefik with Gateway API support + copy: + dest: /var/lib/rancher/k3s/server/manifests/traefik-config.yaml + mode: '0644' + content: | + apiVersion: helm.cattle.io/v1 + kind: HelmChartConfig + metadata: + name: traefik + namespace: kube-system + spec: + valuesContent: |- + deployment: + kind: DaemonSet + + ports: + web: + port: 8000 + exposedPort: 80 + websecure: + port: 8443 + exposedPort: 443 + + service: + type: LoadBalancer + + providers: + kubernetesGateway: + enabled: true + kubernetesIngress: + enabled: false + + gatewayClass: + enabled: true + name: traefik + + rbac: + enabled: true + changed_when: true + - name: Get K3s token slurp: src: /var/lib/rancher/k3s/server/node-token From 52a7ee7e89d301343c2148c3b8b1f39cd18ebbf9 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 12:40:41 +0200 Subject: [PATCH 39/58] Remove manual Gateway API CRD install from k3s_master Traefik's Helm chart installs the CRDs itself via the traefik-crd release. Installing them manually beforehand causes Helm ownership conflicts (missing app.kubernetes.io/managed-by label). Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_master/tasks/main.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 27c83c3..300801f 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -152,19 +152,6 @@ delay: 10 changed_when: false -- name: Install Gateway API CRDs - shell: kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml - register: gateway_api_install - changed_when: "'created' in gateway_api_install.stdout or 'configured' in gateway_api_install.stdout" - -- name: Wait for Gateway API CRDs to be established - shell: kubectl wait --for=condition=Established crd/gateways.gateway.networking.k8s.io --timeout=60s - register: gateway_crd_wait - until: gateway_crd_wait.rc == 0 - retries: 12 - delay: 5 - changed_when: false - - name: Configure Traefik with Gateway API support copy: dest: /var/lib/rancher/k3s/server/manifests/traefik-config.yaml From b777119713cb8f02e9d0c2442af6578b1e5ba3e7 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 13:05:55 +0200 Subject: [PATCH 40/58] Configure Traefik Gateway for all namespaces and cert-manager - Set namespacePolicy: All on web/websecure listeners so HTTPRoutes from any namespace can attach to the Gateway - Annotate Gateway with cert-manager cluster-issuer for auto TLS - Fix ClusterIssuer solver to reference traefik Gateway in kube-system Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_addons/tasks/main.yml | 6 ++++-- .../ansible/roles/k3s_master/tasks/main.yml | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index 06418b3..0f73d8d 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -147,7 +147,8 @@ - http01: gatewayHTTPRoute: parentRefs: - - name: processcube-gateway + - name: traefik + namespace: kube-system kind: Gateway group: gateway.networking.k8s.io EOF @@ -170,7 +171,8 @@ - http01: gatewayHTTPRoute: parentRefs: - - name: processcube-gateway + - name: traefik + namespace: kube-system kind: Gateway group: gateway.networking.k8s.io EOF diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 300801f..7cebb51 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -188,6 +188,16 @@ enabled: true name: traefik + gateway: + enabled: true + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + listeners: + web: + namespacePolicy: All + websecure: + namespacePolicy: All + rbac: enabled: true changed_when: true From 7ec691538783e9281ea43db9d89fa97644e9a785 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 13:24:53 +0200 Subject: [PATCH 41/58] Add port to Traefik gateway listeners in HelmChartConfig The chart template requires port to be explicit when overriding listener config. Use entrypoint ports (8000/8443), not service ports (80/443). Co-Authored-By: Claude Sonnet 4.6 --- .../ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 7cebb51..f736446 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -194,8 +194,10 @@ cert-manager.io/cluster-issuer: letsencrypt-production listeners: web: + port: 8000 namespacePolicy: All websecure: + port: 8443 namespacePolicy: All rbac: From 602509332d463f90a6cb18357f2bd41f93fcdb66 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 13:38:28 +0200 Subject: [PATCH 42/58] Fix Traefik gateway listener config in HelmChartConfig Use raw Gateway API structure instead of namespacePolicy (not a valid Traefik chart field). Add required protocol field to each listener. Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_master/tasks/main.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index f736446..62ee6ae 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -195,10 +195,19 @@ listeners: web: port: 8000 - namespacePolicy: All + protocol: HTTP + allowedRoutes: + namespaces: + from: All websecure: port: 8443 - namespacePolicy: All + protocol: HTTPS + allowedRoutes: + namespaces: + from: All + tls: + mode: Terminate + certificateRefs: [] rbac: enabled: true From bd9b38dbfe7afbef3247d27cbda2a1c322271e5b Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 14:16:03 +0200 Subject: [PATCH 43/58] Add certificateRef to Traefik HTTPS gateway listener Co-Authored-By: Claude Sonnet 4.6 --- .../ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 62ee6ae..b680bb8 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -207,7 +207,8 @@ from: All tls: mode: Terminate - certificateRefs: [] + certificateRefs: + - name: traefik-tls-secret rbac: enabled: true From f8ceadcd1b3736867888dd9679dbc94f58d951e5 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 17:04:01 +0200 Subject: [PATCH 44/58] Switch from Gateway API to Traefik Ingress Controller - HelmChartConfig: enable kubernetesIngress, disable kubernetesGateway, remove all Gateway API config (gatewayClass, gateway, listeners) - cert-manager ClusterIssuers: use ingress HTTP-01 solver (class: traefik) instead of gatewayHTTPRoute solver - Remove shared Gateway resource creation from k3s_addons Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_addons/tasks/main.yml | 22 ++++--------- .../ansible/roles/k3s_master/tasks/main.yml | 32 ++----------------- 2 files changed, 9 insertions(+), 45 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index 0f73d8d..20d7afd 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -145,12 +145,8 @@ name: letsencrypt-staging-key solvers: - http01: - gatewayHTTPRoute: - parentRefs: - - name: traefik - namespace: kube-system - kind: Gateway - group: gateway.networking.k8s.io + ingress: + class: traefik EOF changed_when: true @@ -169,12 +165,8 @@ name: letsencrypt-production-key solvers: - http01: - gatewayHTTPRoute: - parentRefs: - - name: traefik - namespace: kube-system - kind: Gateway - group: gateway.networking.k8s.io + ingress: + class: traefik EOF changed_when: true @@ -182,8 +174,7 @@ debug: msg: - "Hetzner CSI Driver installed successfully" - - "Traefik v3 Gateway Controller installed successfully" - - "Gateway API CRDs (v1.2.0) installed successfully" + - "Traefik Ingress Controller installed successfully" - "LoadBalancer created automatically by CCM" - "cert-manager v1.20.2 installed successfully" - "Let's Encrypt ClusterIssuers configured:" @@ -191,9 +182,8 @@ - " - letsencrypt-production (for production use)" - "" - "LoadBalancer IP: {{ lb_ip.stdout }}" - - "Gateway available at: http://{{ lb_ip.stdout }}" - "" - - "To use TLS certificates, annotate your Gateway resource with:" + - "To use TLS certificates, annotate your Ingress resource with:" - " cert-manager.io/cluster-issuer: letsencrypt-staging" - " or" - " cert-manager.io/cluster-issuer: letsencrypt-production" diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index b680bb8..16c6ea9 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -152,7 +152,7 @@ delay: 10 changed_when: false -- name: Configure Traefik with Gateway API support +- name: Configure Traefik as Ingress Controller copy: dest: /var/lib/rancher/k3s/server/manifests/traefik-config.yaml mode: '0644' @@ -179,37 +179,11 @@ type: LoadBalancer providers: - kubernetesGateway: - enabled: true kubernetesIngress: + enabled: true + kubernetesGateway: enabled: false - gatewayClass: - enabled: true - name: traefik - - gateway: - enabled: true - annotations: - cert-manager.io/cluster-issuer: letsencrypt-production - listeners: - web: - port: 8000 - protocol: HTTP - allowedRoutes: - namespaces: - from: All - websecure: - port: 8443 - protocol: HTTPS - allowedRoutes: - namespaces: - from: All - tls: - mode: Terminate - certificateRefs: - - name: traefik-tls-secret - rbac: enabled: true changed_when: true From cc2affbc949db59d775270d6d61f0da13babd38a Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 16 Apr 2026 17:22:04 +0200 Subject: [PATCH 45/58] Remove Gateway API CRDs after Traefik install Traefik v3 Helm chart installs Gateway API CRDs via traefik-crd subchart even when kubernetesGateway is disabled. Cuby detects the CRDs and fails if no Gateway object exists. Delete them since we only use the Ingress controller. Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_addons/tasks/main.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index 20d7afd..d60030b 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -56,6 +56,17 @@ delay: 10 changed_when: false +- name: Remove Gateway API CRDs (not needed for Ingress-only setup) + shell: | + kubectl delete crd \ + gateways.gateway.networking.k8s.io \ + gatewayclasses.gateway.networking.k8s.io \ + httproutes.gateway.networking.k8s.io \ + grpcroutes.gateway.networking.k8s.io \ + referencegrants.gateway.networking.k8s.io \ + --ignore-not-found + changed_when: true + - name: Annotate Traefik Service for Hetzner LoadBalancer shell: | kubectl annotate service traefik \ From 9f1cda0cae1fd17a9380b674275ddc12f5540010 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 22 Apr 2026 16:04:24 +0200 Subject: [PATCH 46/58] Fix trailing newline in SSH public key causing Terraform inconsistency Co-Authored-By: Claude Sonnet 4.6 --- hetzner-setup/ProcessCube.Cloud/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index f50ac01..ebb116e 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -27,7 +27,7 @@ provider "hcloud" { # SSH Key for accessing the servers resource "hcloud_ssh_key" "k3s" { name = "${var.cluster_name}-key" - public_key = file(var.ssh_public_key_path) + public_key = trimspace(file(var.ssh_public_key_path)) } # Network for the cluster From ec7713c32dd2e406fea05d1c0daeecbd256246ec Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 22 Apr 2026 16:58:36 +0200 Subject: [PATCH 47/58] Ignore known_hosts to prevent SSH timeout on server recreation Co-Authored-By: Claude Sonnet 4.6 --- hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg b/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg index a6d9cb8..bd04725 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg +++ b/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg @@ -8,6 +8,6 @@ forks = 1 timeout = 60 [ssh_connection] -ssh_args = -o ControlMaster=auto -o ControlPersist=300s -o StrictHostKeyChecking=no -o ServerAliveInterval=30 -o ServerAliveCountMax=10 -o ConnectTimeout=30 +ssh_args = -o ControlMaster=auto -o ControlPersist=300s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ServerAliveInterval=30 -o ServerAliveCountMax=10 -o ConnectTimeout=30 pipelining = True control_path = /tmp/ansible-ssh-%%h-%%p-%%r From 048f1ecd840ab9969b9016a09243dd8c5b316234 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Wed, 22 Apr 2026 17:08:06 +0200 Subject: [PATCH 48/58] Wait for SSH availability instead of static sleep before Ansible Co-Authored-By: Claude Sonnet 4.6 --- hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg | 2 +- hetzner-setup/ProcessCube.Cloud/main.tf | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg b/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg index bd04725..67fd705 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg +++ b/hetzner-setup/ProcessCube.Cloud/ansible/ansible.cfg @@ -8,6 +8,6 @@ forks = 1 timeout = 60 [ssh_connection] -ssh_args = -o ControlMaster=auto -o ControlPersist=300s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ServerAliveInterval=30 -o ServerAliveCountMax=10 -o ConnectTimeout=30 +ssh_args = -o ControlMaster=auto -o ControlPersist=300s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ServerAliveInterval=30 -o ServerAliveCountMax=10 -o ConnectTimeout=60 pipelining = True control_path = /tmp/ansible-ssh-%%h-%%p-%%r diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index ebb116e..7a6071e 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -212,7 +212,13 @@ resource "local_file" "ansible_inventory" { # Wait for servers to be ready resource "null_resource" "wait_for_servers" { provisioner "local-exec" { - command = "sleep 90" + command = <<-EOT + for ip in ${hcloud_server.k3s_master.ipv4_address} ${join(" ", hcloud_server.k3s_worker[*].ipv4_address)}; do + echo "Waiting for SSH on $ip..." + timeout 300 bash -c "until nc -z -w5 $ip 22 2>/dev/null; do sleep 5; done" + echo "SSH ready on $ip" + done + EOT } depends_on = [ From 99ec35d185bb8f1d06b3b71e5dcbad6aab486870 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Thu, 23 Apr 2026 09:25:18 +0200 Subject: [PATCH 49/58] Use ssh-keyscan for SSH readiness check instead of nc Co-Authored-By: Claude Sonnet 4.6 --- hetzner-setup/ProcessCube.Cloud/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index 7a6071e..5885642 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -215,7 +215,7 @@ resource "null_resource" "wait_for_servers" { command = <<-EOT for ip in ${hcloud_server.k3s_master.ipv4_address} ${join(" ", hcloud_server.k3s_worker[*].ipv4_address)}; do echo "Waiting for SSH on $ip..." - timeout 300 bash -c "until nc -z -w5 $ip 22 2>/dev/null; do sleep 5; done" + timeout 300 bash -c "until ssh-keyscan -T 10 -p 22 $ip 2>/dev/null | grep -q ssh; do sleep 5; done" echo "SSH ready on $ip" done EOT From 54b6283c51cc62386edeb0dc60619a0837c9b148 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 4 May 2026 17:14:25 +0200 Subject: [PATCH 50/58] Fix LoadBalancer not provisioned by embedding Hetzner annotations in Traefik HelmChartConfig Annotating the Traefik service after creation caused a CCM reconciliation race condition (location annotation conflicts with an already-provisioned LB). Moving annotations into the HelmChartConfig ensures the service starts with the correct Hetzner annotations so the CCM can provision the LoadBalancer correctly on first reconcile. Also adds diagnostic output (CCM logs + kubectl describe) when the LB wait times out, so failures are easier to debug. Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_addons/tasks/main.yml | 35 +++++++++++++++++-- .../ansible/roles/k3s_master/tasks/main.yml | 8 +++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index d60030b..47af03a 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -67,7 +67,7 @@ --ignore-not-found changed_when: true -- name: Annotate Traefik Service for Hetzner LoadBalancer +- name: Ensure Traefik Service annotations are present (overwrite in case HelmChart reset them) shell: | kubectl annotate service traefik \ -n kube-system \ @@ -75,7 +75,6 @@ load-balancer.hetzner.cloud/use-private-ip="true" \ load-balancer.hetzner.cloud/uses-proxyprotocol="false" \ load-balancer.hetzner.cloud/name="{{ cluster_name }}-lb" \ - load-balancer.hetzner.cloud/node-selector="loadbalancer-target=true" \ load-balancer.hetzner.cloud/health-check-interval="10s" \ load-balancer.hetzner.cloud/health-check-timeout="5s" \ load-balancer.hetzner.cloud/health-check-retries="3" \ @@ -84,7 +83,7 @@ - name: Wait a moment for LoadBalancer to start provisioning pause: - seconds: 15 + seconds: 30 - name: Wait for LoadBalancer to be provisioned shell: kubectl get svc traefik -n kube-system -o jsonpath='{.status.loadBalancer.ingress[0].ip}' @@ -93,6 +92,36 @@ retries: 60 delay: 10 changed_when: false + ignore_errors: true + +- name: Collect CCM logs on LoadBalancer provisioning failure + shell: kubectl logs -n kube-system -l app=hcloud-cloud-controller-manager --tail=100 + register: ccm_logs_on_failure + changed_when: false + ignore_errors: true + when: lb_ip.stdout == "" + +- name: Show CCM logs on failure + debug: + msg: "{{ ccm_logs_on_failure.stdout_lines }}" + when: lb_ip.stdout == "" + +- name: Show Traefik service events on failure + shell: kubectl describe svc traefik -n kube-system + register: traefik_svc_describe + changed_when: false + ignore_errors: true + when: lb_ip.stdout == "" + +- name: Display Traefik service description on failure + debug: + msg: "{{ traefik_svc_describe.stdout_lines }}" + when: lb_ip.stdout == "" + +- name: Fail if LoadBalancer was not provisioned + fail: + msg: "LoadBalancer IP was not assigned after 10 minutes. See CCM logs and service events above for details." + when: lb_ip.stdout == "" - name: Display LoadBalancer IP debug: diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 16c6ea9..25d0720 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -177,6 +177,14 @@ service: type: LoadBalancer + annotations: + load-balancer.hetzner.cloud/location: "{{ location }}" + load-balancer.hetzner.cloud/name: "{{ cluster_name }}-lb" + load-balancer.hetzner.cloud/use-private-ip: "true" + load-balancer.hetzner.cloud/uses-proxyprotocol: "false" + load-balancer.hetzner.cloud/health-check-interval: "10s" + load-balancer.hetzner.cloud/health-check-timeout: "5s" + load-balancer.hetzner.cloud/health-check-retries: "3" providers: kubernetesIngress: From 497290339755e4e7ff4a505839a60048562da979 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 4 May 2026 17:37:11 +0200 Subject: [PATCH 51/58] Fix worker node join reliability and improve cluster verify diagnostics k3s-agent sometimes takes longer to stabilize after install. Replace direct systemd start with a wait-loop (18x10s) that verifies `systemctl is-active` and collects journalctl logs on failure, so flaky joins are retried and failures are visible in the output. Fix Verify Cluster to use awk for exact Ready-state matching instead of grep -v NotReady, and add kubectl describe output for any NotReady nodes to make failures self-diagnosing. Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_worker/tasks/main.yml | 28 +++++++++++++++++++ .../ProcessCube.Cloud/ansible/site.yml | 24 +++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index 365d064..f25f457 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -103,6 +103,34 @@ enabled: yes state: started daemon_reload: yes + register: agent_service_start + ignore_errors: true + +- name: Wait for K3s agent to become active + shell: systemctl is-active k3s-agent + register: agent_active + until: agent_active.stdout == "active" + retries: 18 + delay: 10 + changed_when: false + ignore_errors: true + +- name: Collect k3s-agent logs if not active + shell: journalctl -xeu k3s-agent --no-pager -n 50 + register: agent_logs + changed_when: false + ignore_errors: true + when: agent_active.stdout != "active" + +- name: Show k3s-agent logs on failure + debug: + msg: "{{ agent_logs.stdout_lines }}" + when: agent_active.stdout != "active" + +- name: Fail if K3s agent is not active + fail: + msg: "K3s agent service failed to start. Check logs above." + when: agent_active.stdout != "active" - name: Display K3s worker installation info debug: diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml index b4bb105..ddb3967 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/site.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/site.yml @@ -121,12 +121,13 @@ become: yes tasks: - name: Wait for all nodes to be ready - shell: kubectl get nodes --no-headers | grep -v NotReady | wc -l + shell: kubectl get nodes --no-headers | awk '$2 == "Ready" {count++} END {print count+0}' register: ready_nodes until: ready_nodes.stdout|int == groups['k3s_cluster']|length retries: 30 delay: 10 changed_when: false + ignore_errors: true - name: Display cluster status shell: kubectl get nodes @@ -136,3 +137,24 @@ - name: Show cluster nodes debug: var: cluster_status.stdout_lines + + - name: Describe NotReady nodes for diagnostics + shell: | + for node in $(kubectl get nodes --no-headers | awk '$2 != "Ready" {print $1}'); do + echo "=== Node: $node ===" + kubectl describe node "$node" | tail -30 + done + register: notready_nodes_info + changed_when: false + ignore_errors: true + when: ready_nodes.stdout|int < groups['k3s_cluster']|length + + - name: Show NotReady node details + debug: + msg: "{{ notready_nodes_info.stdout_lines }}" + when: ready_nodes.stdout|int < groups['k3s_cluster']|length + + - name: Fail if not all nodes are ready + fail: + msg: "Only {{ ready_nodes.stdout }} of {{ groups['k3s_cluster']|length }} nodes are Ready. See node details above." + when: ready_nodes.stdout|int < groups['k3s_cluster']|length From ab70c672bb4f8e4f0b1e1d49f632991868e0f96f Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 4 May 2026 17:52:29 +0200 Subject: [PATCH 52/58] Fix network zone mismatch causing LoadBalancer provisioning failure Hardcoded network_zone = "eu-central" does not always match the actual zone of the configured location (e.g. hel1 may be in a different zone). Use a hcloud_location data source to derive the correct network_zone from var.location, ensuring the private network subnet always matches the LB location and the CCM can attach the LoadBalancer to the network. Co-Authored-By: Claude Sonnet 4.6 --- hetzner-setup/ProcessCube.Cloud/main.tf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/main.tf b/hetzner-setup/ProcessCube.Cloud/main.tf index 5885642..55c9893 100644 --- a/hetzner-setup/ProcessCube.Cloud/main.tf +++ b/hetzner-setup/ProcessCube.Cloud/main.tf @@ -31,6 +31,10 @@ resource "hcloud_ssh_key" "k3s" { } # Network for the cluster +data "hcloud_location" "cluster" { + name = var.location +} + resource "hcloud_network" "k3s" { name = "${var.cluster_name}-network" ip_range = "10.0.0.0/16" @@ -39,7 +43,7 @@ resource "hcloud_network" "k3s" { resource "hcloud_network_subnet" "k3s" { network_id = hcloud_network.k3s.id type = "cloud" - network_zone = "eu-central" + network_zone = data.hcloud_location.cluster.network_zone ip_range = "10.0.1.0/24" lifecycle { From 22569b516eb87beb626c94ef5b90bd93066b8089 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Tue, 5 May 2026 13:36:31 +0200 Subject: [PATCH 53/58] Fix deprecated ingress class field in ClusterIssuer HTTP01 solvers Replace deprecated `class: traefik` with `ingressClassName: traefik` in both letsencrypt-staging and letsencrypt-production ClusterIssuers. Co-Authored-By: Claude Sonnet 4.6 --- .../ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index 47af03a..d41a455 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -186,7 +186,7 @@ solvers: - http01: ingress: - class: traefik + ingressClassName: traefik EOF changed_when: true @@ -206,7 +206,7 @@ solvers: - http01: ingress: - class: traefik + ingressClassName: traefik EOF changed_when: true From 6e49abb4d47e50e239e628b092de431c7516178f Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Sat, 9 May 2026 09:41:57 +0200 Subject: [PATCH 54/58] Update hcloud-cloud-controller-manager to v1.31.0 Bump default from v1.20.0 to latest stable v1.31.0. Co-Authored-By: Claude Sonnet 4.6 --- hetzner-setup/ProcessCube.Cloud/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/variables.tf b/hetzner-setup/ProcessCube.Cloud/variables.tf index 1c356ae..41ae35a 100644 --- a/hetzner-setup/ProcessCube.Cloud/variables.tf +++ b/hetzner-setup/ProcessCube.Cloud/variables.tf @@ -46,7 +46,7 @@ variable "hcloud_csi_version" { variable "hcloud_ccm_version" { description = "Hetzner Cloud Controller Manager version" type = string - default = "v1.20.0" + default = "v1.31.0" } variable "ssh_public_key_path" { From 21d56a1de2f9e124694d34a139fb09c29aceec82 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Sat, 9 May 2026 09:43:24 +0200 Subject: [PATCH 55/58] Handle breaking ClusterRoleBinding migration for CCM v1.28.0+ Delete the old ClusterRoleBinding system:hcloud-cloud-controller-manager before applying the manifest, as v1.28.0 renamed it to the :restricted suffix. The roleRef field is immutable, so kubectl apply would fail on existing clusters without this cleanup step. Co-Authored-By: Claude Sonnet 4.6 --- .../ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml index 99aea45..a4f2524 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml @@ -22,6 +22,10 @@ dest: /tmp/hcloud-ccm.yaml mode: '0644' +- name: Remove deprecated CCM ClusterRoleBinding (breaking change in v1.28.0) + shell: kubectl delete clusterrolebinding system:hcloud-cloud-controller-manager --ignore-not-found + changed_when: false + - name: Install Hetzner Cloud Controller Manager shell: kubectl apply -f /tmp/hcloud-ccm.yaml register: ccm_install From 648d3ae7fc8f770ecf9572be0fc9b13cf042abe0 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 11 May 2026 18:21:52 +0200 Subject: [PATCH 56/58] Fix LoadBalancer network annotation to use cluster_name-specific network Without an explicit network annotation, the Hetzner CCM could attach the LoadBalancer to the wrong network (e.g. an existing processcube-cluster network) when multiple clusters share the same Hetzner project. Co-Authored-By: Claude Sonnet 4.6 --- .../ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml | 3 ++- .../ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml index d41a455..0754f48 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_addons/tasks/main.yml @@ -72,9 +72,10 @@ kubectl annotate service traefik \ -n kube-system \ load-balancer.hetzner.cloud/location="{{ location }}" \ + load-balancer.hetzner.cloud/name="{{ cluster_name }}-lb" \ + load-balancer.hetzner.cloud/network="{{ cluster_name }}-network" \ load-balancer.hetzner.cloud/use-private-ip="true" \ load-balancer.hetzner.cloud/uses-proxyprotocol="false" \ - load-balancer.hetzner.cloud/name="{{ cluster_name }}-lb" \ load-balancer.hetzner.cloud/health-check-interval="10s" \ load-balancer.hetzner.cloud/health-check-timeout="5s" \ load-balancer.hetzner.cloud/health-check-retries="3" \ diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 25d0720..84451e8 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -180,6 +180,7 @@ annotations: load-balancer.hetzner.cloud/location: "{{ location }}" load-balancer.hetzner.cloud/name: "{{ cluster_name }}-lb" + load-balancer.hetzner.cloud/network: "{{ cluster_name }}-network" load-balancer.hetzner.cloud/use-private-ip: "true" load-balancer.hetzner.cloud/uses-proxyprotocol: "false" load-balancer.hetzner.cloud/health-check-interval: "10s" From ed17e40facbf6b8211f5e245432b71bcbc22ad76 Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Mon, 11 May 2026 18:36:43 +0200 Subject: [PATCH 57/58] Disable multipathd on cluster nodes to fix Hetzner CSI volume mounting On Ubuntu 24.04, multipathd is installed by default and intercepts new SCSI devices before udev can create the /dev/disk/by-id/scsi-0HC_Volume_* symlinks that the Hetzner CSI driver depends on. This causes FailedMount errors with "The file does not exist" even when Hetzner reports the volume as successfully attached. Co-Authored-By: Claude Sonnet 4.6 --- .../ansible/roles/k3s_master/tasks/main.yml | 6 ++++++ .../ansible/roles/k3s_worker/tasks/main.yml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml index 84451e8..877d366 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_master/tasks/main.yml @@ -17,6 +17,12 @@ sed -i '/ swap / s/^/#/' /etc/fstab changed_when: false +- name: Disable multipathd to prevent interference with Hetzner CSI volumes + shell: | + systemctl stop multipathd.service multipathd.socket 2>/dev/null || true + systemctl disable multipathd.service multipathd.socket 2>/dev/null || true + changed_when: false + - name: Configure firewall rules shell: | ufw allow 22/tcp diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index f25f457..fbd2ad4 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -15,6 +15,12 @@ sed -i '/ swap / s/^/#/' /etc/fstab changed_when: false +- name: Disable multipathd to prevent interference with Hetzner CSI volumes + shell: | + systemctl stop multipathd.service multipathd.socket 2>/dev/null || true + systemctl disable multipathd.service multipathd.socket 2>/dev/null || true + changed_when: false + - name: Configure firewall rules shell: | ufw allow 22/tcp From e3a157ac1c0aa178ed52cf84400b7b122a100fff Mon Sep 17 00:00:00 2001 From: Robin Lenz Date: Tue, 30 Jun 2026 11:02:42 +0200 Subject: [PATCH 58/58] Harden worker private-network wait and fix CCM cluster CIDR Workers timed out for 300s on the master API (10.0.1.2:6443) when the private interface enp7s0 had not yet received its 10.0.x.x address, since the wait ran before any interface check. Add a pre-wait task that ensures enp7s0 is up with a cluster-subnet IP (best-effort link up + dhclient), emit interface/route diagnostics on failure, and replace the silent 300s wait_for with a 120s one carrying an actionable message. Also align the CCM manifest cluster CIDR (10.244.0.0/16) with the K3s default (10.42.0.0/16) to remove the route-controller CIDR-mismatch warning. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01DZPTZ6d76eLgCpw8FheVgq --- .../ansible/roles/k3s_ccm/tasks/main.yml | 6 ++++ .../ansible/roles/k3s_worker/tasks/main.yml | 34 +++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml index a4f2524..4f75c32 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_ccm/tasks/main.yml @@ -22,6 +22,12 @@ dest: /tmp/hcloud-ccm.yaml mode: '0644' +- name: Align CCM cluster CIDR with K3s default (10.42.0.0/16) + replace: + path: /tmp/hcloud-ccm.yaml + regexp: '10\.244\.0\.0/16' + replace: '10.42.0.0/16' + - name: Remove deprecated CCM ClusterRoleBinding (breaking change in v1.28.0) shell: kubectl delete clusterrolebinding system:hcloud-cloud-controller-manager --ignore-not-found changed_when: false diff --git a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml index fbd2ad4..86f4b95 100644 --- a/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml +++ b/hetzner-setup/ProcessCube.Cloud/ansible/roles/k3s_worker/tasks/main.yml @@ -33,11 +33,41 @@ ufw default deny incoming ufw --force enable -- name: Wait for master node to be ready +- name: Ensure private network interface has an IP in the cluster subnet + shell: | + for i in $(seq 1 30); do + if ip -4 addr show enp7s0 2>/dev/null | grep -qoP '(?<=inet\s)10\.0\.'; then + exit 0 + fi + ip link set enp7s0 up 2>/dev/null || true + dhclient -1 enp7s0 2>/dev/null || true + sleep 5 + done + echo "enp7s0 never received a 10.0.x.x address" >&2 + exit 1 + register: private_iface_up + changed_when: false + +- name: Show private interface state on failure + shell: ip -4 addr show enp7s0; echo '--- routes ---'; ip route + register: iface_debug + changed_when: false + when: private_iface_up.rc != 0 + +- name: Display private interface diagnostics + debug: + msg: "{{ iface_debug.stdout_lines }}" + when: private_iface_up.rc != 0 + +- name: Wait for master node API to be reachable over the private network wait_for: host: "{{ master_ip }}" port: 6443 - timeout: 300 + timeout: 120 + msg: >- + Cannot reach the master API at {{ master_ip }}:6443 over the private network. + Check that enp7s0 has a 10.0.x.x address (see diagnostics above) and that the + master's firewall allows traffic from 10.0.0.0/16. - name: Check if K3s is already installed stat: