diff --git a/internal/bootstrap/gcp/gce.go b/internal/bootstrap/gcp/gce.go index a7b7fc66..82716a85 100644 --- a/internal/bootstrap/gcp/gce.go +++ b/internal/bootstrap/gcp/gce.go @@ -33,9 +33,9 @@ var vmDefs = []VMDef{ {"ceph-1", "e2-standard-8", []string{"ceph"}, []int64{10, 100}, false}, {"ceph-2", "e2-standard-8", []string{"ceph"}, []int64{10, 100}, false}, {"ceph-3", "e2-standard-8", []string{"ceph"}, []int64{10, 100}, false}, - {"k0s-1", "e2-standard-8", []string{"k0s"}, []int64{}, false}, - {"k0s-2", "e2-standard-8", []string{"k0s"}, []int64{}, false}, - {"k0s-3", "e2-standard-8", []string{"k0s"}, []int64{}, false}, + {"k0s-1", "e2-standard-8", []string{"k0s"}, []int64{100}, false}, + {"k0s-2", "e2-standard-8", []string{"k0s"}, []int64{100}, false}, + {"k0s-3", "e2-standard-8", []string{"k0s"}, []int64{100}, false}, } // validateVMProvisioningOptions checks that spot and preemptible options are not both set diff --git a/internal/bootstrap/gcp/gcp.go b/internal/bootstrap/gcp/gcp.go index f91e56ce..ee1187b9 100644 --- a/internal/bootstrap/gcp/gcp.go +++ b/internal/bootstrap/gcp/gcp.go @@ -290,6 +290,11 @@ func (b *GCPBootstrapper) Bootstrap() error { return fmt.Errorf("failed to ensure hosts are configured: %w", err) } + err = b.stlog.Step("Ensure etcd disks mounted", b.EnsureEtcdDisksMounted) + if err != nil { + return fmt.Errorf("failed to ensure etcd disks are mounted: %w", err) + } + if b.Env.RegistryType == RegistryTypeLocalContainer { err = b.stlog.Step("Ensure local container registry", b.EnsureLocalContainerRegistry) if err != nil { @@ -332,8 +337,7 @@ func (b *GCPBootstrapper) Bootstrap() error { } if b.Env.InstallVersion != "" || b.Env.InstallLocal != "" { - err = b.stlog.Step("Install Codesphere", b.InstallCodesphere) - if err != nil { + if err = b.stlog.Step("Install Codesphere", b.InstallCodesphere); err != nil { return fmt.Errorf("failed to install Codesphere: %w", err) } @@ -832,6 +836,44 @@ func (b *GCPBootstrapper) EnsureHostsConfigured() error { return nil } +// EnsureEtcdDisksMounted formats and mounts the dedicated etcd disk (/dev/sdb) on each control +// plane node at /var/lib/k0s/etcd. The disk is persisted via /etc/fstab using its UUID. +// This must run before k0s is installed so etcd writes land on the dedicated PD-SSD. +func (b *GCPBootstrapper) EnsureEtcdDisksMounted() error { + for _, n := range b.Env.ControlPlaneNodes { + // Idempotency check: skip if already mounted. + if err := n.RunSSHCommand("root", "mountpoint -q /var/lib/k0s/etcd"); err == nil { + b.stlog.Logf("etcd disk already mounted on %s, skipping", n.GetName()) + continue + } + + // Format /dev/sdb with ext4 if it has no filesystem yet. + if err := n.RunSSHCommand("root", "blkid -s TYPE -o value /dev/sdb | grep -q ext4"); err != nil { + b.stlog.Logf("Formatting etcd disk on %s", n.GetName()) + if err := n.RunSSHCommand("root", "mkfs.ext4 -F /dev/sdb"); err != nil { + return fmt.Errorf("failed to format etcd disk on %s: %w", n.GetName(), err) + } + } + + // Create mount point directory. + if err := n.RunSSHCommand("root", "mkdir -p /var/lib/k0s/etcd"); err != nil { + return fmt.Errorf("failed to create etcd mount point on %s: %w", n.GetName(), err) + } + + // Register in /etc/fstab by UUID (survives reboots) and mount. + fstabAndMount := `DISK_UUID=$(blkid -s UUID -o value /dev/sdb) && ` + + `grep -qF "UUID=$DISK_UUID" /etc/fstab || ` + + `echo "UUID=$DISK_UUID /var/lib/k0s/etcd ext4 defaults,noatime 0 2" >> /etc/fstab && ` + + `mount /var/lib/k0s/etcd` + if err := n.RunSSHCommand("root", fstabAndMount); err != nil { + return fmt.Errorf("failed to mount etcd disk on %s: %w", n.GetName(), err) + } + + b.stlog.Logf("etcd disk mounted at /var/lib/k0s/etcd on %s", n.GetName()) + } + return nil +} + // EnsureLocalContainerRegistry installs a docker registry on the postgres node to speed up image loading time func (b *GCPBootstrapper) EnsureLocalContainerRegistry() error { localRegistryServer := b.Env.PostgreSQLNode.GetInternalIP() + ":5000" @@ -1031,10 +1073,10 @@ func (b *GCPBootstrapper) GenerateK0sConfigScript() error { cat < cloud.conf [Global] -project-id = "$PROJECT_ID" +project-id = "` + b.Env.ProjectID + `" EOF -cat <> cc-deployment.yaml +cat < cc-deployment.yaml apiVersion: apps/v1 kind: DaemonSet metadata: @@ -1085,7 +1127,7 @@ spec: EOF KUBECTL="/etc/codesphere/deps/kubernetes/files/k0s kubectl" -$KUBECTL create configmap cloud-config --from-file=cloud.conf -n kube-system +$KUBECTL create configmap cloud-config --from-file=cloud.conf -n kube-system --dry-run=client -o yaml | $KUBECTL apply -f - echo alias kubectl=\"$KUBECTL\" >> /root/.bashrc echo alias k=\"$KUBECTL\" >> /root/.bashrc @@ -1097,11 +1139,11 @@ $KUBECTL apply -f cc-deployment.yaml $KUBECTL patch svc public-gateway-controller -n codesphere -p '{"spec": {"loadBalancerIP": "'` + b.Env.PublicGatewayIP + `'"}}' $KUBECTL patch svc gateway-controller -n codesphere -p '{"spec": {"loadBalancerIP": "'` + b.Env.GatewayIP + `'"}}' -sed -i 's/k0scontroller/k0scontroller --enable-cloud-provider/g' /etc/systemd/system/k0scontroller.service +grep -qF -- --enable-cloud-provider /etc/systemd/system/k0scontroller.service || sed -i '/ExecStart=/s/$/ --enable-cloud-provider/' /etc/systemd/system/k0scontroller.service -ssh -o StrictHostKeyChecking=no root@` + b.Env.ControlPlaneNodes[1].GetInternalIP() + ` "sed -i 's/k0sworker/k0sworker --enable-cloud-provider/g' /etc/systemd/system/k0sworker.service; systemctl daemon-reload; systemctl restart k0sworker" +ssh -o StrictHostKeyChecking=no -o ConnectTimeout=60 root@` + b.Env.ControlPlaneNodes[1].GetInternalIP() + ` "grep -qF -- --enable-cloud-provider /etc/systemd/system/k0sworker.service || sed -i '/ExecStart=/s/\$/ --enable-cloud-provider/' /etc/systemd/system/k0sworker.service; systemctl daemon-reload; systemctl restart k0sworker" || true -ssh -o StrictHostKeyChecking=no root@` + b.Env.ControlPlaneNodes[2].GetInternalIP() + ` "sed -i 's/k0sworker/k0sworker --enable-cloud-provider/g' /etc/systemd/system/k0sworker.service; systemctl daemon-reload; systemctl restart k0sworker" +ssh -o StrictHostKeyChecking=no -o ConnectTimeout=60 root@` + b.Env.ControlPlaneNodes[2].GetInternalIP() + ` "grep -qF -- --enable-cloud-provider /etc/systemd/system/k0sworker.service || sed -i '/ExecStart=/s/\$/ --enable-cloud-provider/' /etc/systemd/system/k0sworker.service; systemctl daemon-reload; systemctl restart k0sworker" || true systemctl daemon-reload systemctl restart k0scontroller diff --git a/internal/bootstrap/gcp/gcp_test.go b/internal/bootstrap/gcp/gcp_test.go index 83f5d4ec..2f6947ac 100644 --- a/internal/bootstrap/gcp/gcp_test.go +++ b/internal/bootstrap/gcp/gcp_test.go @@ -605,16 +605,16 @@ var _ = Describe("GCP Bootstrapper", func() { Expect(err).To(MatchError(ContainSubstring("prometheus remote write username and password must both be set when remote write URL is specified"))) }) }) - Context("When Prometheus remote write URL is set but only password is missing", func() { - BeforeEach(func() { - csEnv.PrometheusRemoteWriteURL = "https://prometheus.example.com/api/v1/write" - csEnv.PrometheusRemoteWriteUser = "prom-user" - }) - It("returns an error", func() { - err := bs.ValidateInput() - Expect(err).To(MatchError(ContainSubstring("prometheus remote write username and password must both be set when remote write URL is specified"))) - }) - }) + Context("When Prometheus remote write URL is set but only password is missing", func() { + BeforeEach(func() { + csEnv.PrometheusRemoteWriteURL = "https://prometheus.example.com/api/v1/write" + csEnv.PrometheusRemoteWriteUser = "prom-user" + }) + It("returns an error", func() { + err := bs.ValidateInput() + Expect(err).To(MatchError(ContainSubstring("prometheus remote write username and password must both be set when remote write URL is specified"))) + }) + }) Context("When Prometheus remote write credentials are set but URL is missing", func() { BeforeEach(func() { csEnv.PrometheusRemoteWriteUser = "prom-user"