diff --git a/hcloud/cloud.go b/hcloud/cloud.go index 621ee7a1..ad923575 100644 --- a/hcloud/cloud.go +++ b/hcloud/cloud.go @@ -28,6 +28,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes/scheme" v1 "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/record" cloudprovider "k8s.io/cloud-provider" "k8s.io/klog/v2" @@ -55,9 +56,10 @@ type cloud struct { recorder record.EventRecorder networkID int64 cidr string + nodeLister corelisters.NodeLister } -func NewCloud(cidr string) (cloudprovider.Interface, error) { +func NewCloud(cidr string, nodeLister corelisters.NodeLister) (cloudprovider.Interface, error) { const op = "hcloud/newCloud" metrics.OperationCalled.WithLabelValues(op).Inc() @@ -147,6 +149,7 @@ func NewCloud(cidr string) (cloudprovider.Interface, error) { cfg: cfg, networkID: networkID, cidr: cidr, + nodeLister: nodeLister, }, nil } @@ -213,6 +216,7 @@ func (c *cloud) Routes() (cloudprovider.Routes, bool) { c.networkID, c.cidr, c.recorder, + c.nodeLister, ) if err != nil { klog.ErrorS(err, "create routes provider", "networkID", c.networkID) diff --git a/hcloud/cloud_test.go b/hcloud/cloud_test.go index 71634ee5..28e363c9 100644 --- a/hcloud/cloud_test.go +++ b/hcloud/cloud_test.go @@ -95,7 +95,7 @@ func TestNewCloud(t *testing.T) { json.NewEncoder(w).Encode(schema.LocationListResponse{Locations: []schema.Location{}}) }) - _, err := NewCloud(DefaultClusterCIDR) + _, err := NewCloud(DefaultClusterCIDR, nil) assert.NoError(t, err) } @@ -107,7 +107,7 @@ func TestNewCloudConnectionNotPossible(t *testing.T) { ) defer resetEnv() - _, err := NewCloud(DefaultClusterCIDR) + _, err := NewCloud(DefaultClusterCIDR, nil) assert.EqualError(t, err, `hcloud/newCloud: Get "http://127.0.0.1:4711/v1/locations?": dial tcp 127.0.0.1:4711: connect: connection refused`) } @@ -135,7 +135,7 @@ func TestNewCloudInvalidToken(t *testing.T) { ) }) - _, err := NewCloud(DefaultClusterCIDR) + _, err := NewCloud(DefaultClusterCIDR, nil) assert.EqualError(t, err, "hcloud/newCloud: unable to authenticate (unauthorized)") } @@ -172,7 +172,7 @@ func TestCloud(t *testing.T) { ) }) - cloud, err := NewCloud(DefaultClusterCIDR) + cloud, err := NewCloud(DefaultClusterCIDR, nil) if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -229,7 +229,7 @@ func TestCloud(t *testing.T) { ) defer resetEnv() - c, err := NewCloud(DefaultClusterCIDR) + c, err := NewCloud(DefaultClusterCIDR, nil) if err != nil { t.Errorf("%s", err) } diff --git a/hcloud/routes.go b/hcloud/routes.go index 37552330..522c93ea 100644 --- a/hcloud/routes.go +++ b/hcloud/routes.go @@ -5,19 +5,21 @@ import ( "errors" "fmt" "net" + "slices" "time" "golang.org/x/time/rate" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/record" cloudprovider "k8s.io/cloud-provider" "k8s.io/klog/v2" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/providerid" "github.com/hetznercloud/hcloud-go/v2/hcloud" ) @@ -29,9 +31,10 @@ type routes struct { serverCache *hcops.AllServersCache clusterCIDR *net.IPNet recorder record.EventRecorder + nodeLister corelisters.NodeLister } -func newRoutes(client *hcloud.Client, networkID int64, clusterCIDR string, recorder record.EventRecorder) (*routes, error) { +func newRoutes(client *hcloud.Client, networkID int64, clusterCIDR string, recorder record.EventRecorder, nodeLister corelisters.NodeLister) (*routes, error) { const op = "hcloud/newRoutes" metrics.OperationCalled.WithLabelValues(op).Inc() @@ -60,6 +63,7 @@ func newRoutes(client *hcloud.Client, networkID int64, clusterCIDR string, recor }, clusterCIDR: cidr, recorder: recorder, + nodeLister: nodeLister, }, nil } @@ -105,77 +109,123 @@ func (r *routes) CreateRoute(ctx context.Context, _ string, _ string, route *clo const op = "hcloud/CreateRoute" metrics.OperationCalled.WithLabelValues(op).Inc() - srv, err := r.serverCache.ByName(ctx, string(route.TargetNode)) + node, gateway, err := r.resolveRouteTarget(ctx, string(route.TargetNode)) if err != nil { - return fmt.Errorf("%s: %w", op, err) + return fmt.Errorf("%s: error resolving route target: %w", op, err) } - privNet, ok := findServerPrivateNetByID(srv, r.network.ID) - if !ok { - r.serverCache.InvalidateCache() - srv, err = r.serverCache.ByName(ctx, string(route.TargetNode)) - if err != nil { - return fmt.Errorf("%s: %w", op, err) - } - - privNet, ok = findServerPrivateNetByID(srv, r.network.ID) - if !ok { - return fmt.Errorf("%s: server %v: network with id %d not attached to this server", op, route.TargetNode, r.network.ID) - } + if !slices.ContainsFunc(route.TargetNodeAddresses, func(target corev1.NodeAddress) bool { + return target.Type == corev1.NodeInternalIP && target.Address == gateway.String() + }) { + return fmt.Errorf("%s: IP %s not part of routes target addresses", op, gateway.String()) } - ip := privNet.IP _, cidr, err := net.ParseCIDR(route.DestinationCIDR) if err != nil { return fmt.Errorf("%s: %w", op, err) } - clusterPrefixLen, _ := r.clusterCIDR.Mask.Size() - destPrefixLen, _ := cidr.Mask.Size() + r.warnCIDRMismatch(cidr, node) - if !r.clusterCIDR.Contains(cidr.IP) || destPrefixLen < clusterPrefixLen { - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: string(route.TargetNode), - Namespace: "", - }, - } - // Event is only visible via `kubectl get events` and not `kubectl describe node`, - // as we do not have the UID here and `kubectl describe node` filters by UID. - // Because of this behavior we are also dispatching a log message. - r.recorder.Eventf( - node, - corev1.EventTypeWarning, - "ClusterCIDRMisconfigured", - "route CIDR %s is not contained within cluster CIDR %s", - route.DestinationCIDR, - r.clusterCIDR.String(), - ) - klog.Warningf( - "route CIDR %s is not contained within cluster CIDR %s", - route.DestinationCIDR, - r.clusterCIDR.String(), - ) + if err := r.upsertRoute(ctx, gateway, cidr, string(route.TargetNode)); err != nil { + return fmt.Errorf("error upserting route %q via %q: %w", cidr.String(), gateway.String(), err) } - routeExists, err := r.checkIfRouteAlreadyExists(ctx, route) + return nil +} + +// resolveRouteTarget returns the k8s node and the hcloud server's private IP on the routes +// network — everything needed to create a route for this node (gateway IP) and record events +// against it (node). +// Looks up the server by ProviderID to survive k8s node-name drift, with a ByName fallback for +// ID changes (e.g. server recreated). Refreshes the cache once if the private-net attachment +// isn't yet reflected. +func (r *routes) resolveRouteTarget(ctx context.Context, nodeName string) (*corev1.Node, net.IP, error) { + node, err := r.nodeLister.Get(nodeName) if err != nil { - return fmt.Errorf("%s: %w", op, err) + return nil, nil, fmt.Errorf("error fetching node %s by name: %w", nodeName, err) + } + + if node.Spec.ProviderID == "" { + return nil, nil, fmt.Errorf("node %s not yet initialized", node.Name) + } + + id, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID) + if err != nil { + return nil, nil, fmt.Errorf("error parsing provider id %q for node %s: %w", node.Spec.ProviderID, nodeName, err) + } + if !isCloudServer { + return nil, nil, fmt.Errorf("node %s is not a cloud server, routes are only supported for cloud servers", node.Name) + } + + server, err := r.serverCache.ByID(ctx, id) + if errors.Is(err, hcops.ErrNotFound) { + server, err = r.serverCache.ByName(ctx, node.Name) + } + if err != nil { + return nil, nil, fmt.Errorf("error fetching node %s by id: %w", nodeName, err) } - if routeExists { + privNet, ok := findServerPrivateNetByID(server, r.network.ID) + if !ok { + r.serverCache.InvalidateCache() + server, err = r.serverCache.ByID(ctx, server.ID) + if err != nil { + return nil, nil, fmt.Errorf("error fetching node %s by id: %w", nodeName, err) + } + privNet, ok = findServerPrivateNetByID(server, r.network.ID) + if !ok { + return nil, nil, fmt.Errorf("server %s (%d): network with id %d not attached to this server", server.Name, server.ID, r.network.ID) + } + } + + return node, privNet.IP, nil +} + +// upsertRoute ensures the hcloud network has a route for cidr pointing at gateway. A matching +// route is a no-op; a stale route with a different gateway is replaced in place. nodeName is +// used only for logging and for surfacing API conflicts against the right k8s object. +func (r *routes) upsertRoute(ctx context.Context, gateway net.IP, cidr *net.IPNet, nodeName string) error { + if err := r.reloadNetwork(ctx); err != nil { + return fmt.Errorf("error reloading network: %w", err) + } + + destination := cidr.String() + existingIdx := slices.IndexFunc(r.network.Routes, func(nr hcloud.NetworkRoute) bool { + return nr.Destination.String() == destination + }) + if existingIdx >= 0 { + existing := r.network.Routes[existingIdx] + if existing.Gateway.Equal(gateway) { + klog.InfoS( + "route already exists: skipping creation", + "target-node", nodeName, + "destination-cidr", destination, + ) + return nil + } + + action, _, err := r.client.Network.DeleteRoute(ctx, r.network, hcloud.NetworkDeleteRouteOpts{ + Route: existing, + }) + if err != nil { + return fmt.Errorf("error deleting route for %q via %q: %w", cidr.String(), gateway.String(), err) + } + if err := r.client.Action.WaitFor(ctx, action); err != nil { + return fmt.Errorf("error deleting route for %q via %q: %w", cidr.String(), gateway.String(), err) + } klog.InfoS( - "route already exists: skipping creation", - "target-node", route.TargetNode, - "destination-cidr", route.DestinationCIDR, + "deleted stale route with wrong gateway; recreating", + "node", nodeName, + "gateway", gateway, + "cidr", destination, ) - return nil } opts := hcloud.NetworkAddRouteOpts{ Route: hcloud.NetworkRoute{ Destination: cidr, - Gateway: ip, + Gateway: gateway, }, } action, _, err := r.client.Network.AddRoute(ctx, r.network, opts) @@ -183,15 +233,15 @@ func (r *routes) CreateRoute(ctx context.Context, _ string, _ string, route *clo if hcloud.IsError(err, hcloud.ErrorCodeLocked, hcloud.ErrorCodeConflict) { return apierrors.NewConflict( corev1.Resource("nodes"), - string(route.TargetNode), + nodeName, err, ) } - return fmt.Errorf("%s: %w", op, err) + return fmt.Errorf("error adding route for %q via %q: %w", cidr.String(), gateway.String(), err) } if err := r.client.Action.WaitFor(ctx, action); err != nil { - return fmt.Errorf("%s: %w", op, err) + return fmt.Errorf("error adding route for %q via %q: %w", cidr.String(), gateway.String(), err) } return nil @@ -220,17 +270,6 @@ func (r *routes) DeleteRoute(ctx context.Context, _ string, route *cloudprovider return fmt.Errorf("%s: %w", op, err) } - err = r.deleteRouteFromHcloud(ctx, cidr, ip) - if err != nil { - return fmt.Errorf("%s: %w", op, err) - } - return nil -} - -func (r *routes) deleteRouteFromHcloud(ctx context.Context, cidr *net.IPNet, ip net.IP) error { - const op = "hcloud/deleteRouteFromHcloud" - metrics.OperationCalled.WithLabelValues(op).Inc() - opts := hcloud.NetworkDeleteRouteOpts{ Route: hcloud.NetworkRoute{ Destination: cidr, @@ -272,43 +311,19 @@ func (r *routes) hcloudRouteToRoute(ctx context.Context, route hcloud.NetworkRou return cpRoute, nil } -func (r *routes) checkIfRouteAlreadyExists(ctx context.Context, route *cloudprovider.Route) (bool, error) { - const op = "hcloud/checkIfRouteAlreadyExists" - metrics.OperationCalled.WithLabelValues(op).Inc() +func (r *routes) warnCIDRMismatch(cidr *net.IPNet, node *corev1.Node) { + clusterPrefixLen, _ := r.clusterCIDR.Mask.Size() + destPrefixLen, _ := cidr.Mask.Size() - if err := r.reloadNetwork(ctx); err != nil { - return false, fmt.Errorf("%s: %w", op, err) - } - - for _, _route := range r.network.Routes { - if _route.Destination.String() == route.DestinationCIDR { - srv, err := r.serverCache.ByName(ctx, string(route.TargetNode)) - if err != nil { - return false, fmt.Errorf("%s: %w", op, err) - } - privNet, ok := findServerPrivateNetByID(srv, r.network.ID) - if !ok { - return false, fmt.Errorf("%s: server %v: no network with id: %d", op, route.TargetNode, r.network.ID) - } - ip := privNet.IP - - if !_route.Gateway.Equal(ip) { - action, _, err := r.client.Network.DeleteRoute(ctx, r.network, hcloud.NetworkDeleteRouteOpts{ - Route: _route, - }) - if err != nil { - return false, fmt.Errorf("%s: %w", op, err) - } - - if err := r.client.Action.WaitFor(ctx, action); err != nil { - return false, fmt.Errorf("%s: %w", op, err) - } - } - - return true, nil - } + if !r.clusterCIDR.Contains(cidr.IP) || destPrefixLen < clusterPrefixLen { + warnMsg := fmt.Sprintf( + "route CIDR %s is not contained within cluster CIDR %s", + cidr.String(), + r.clusterCIDR.String(), + ) + klog.Warning(warnMsg) + r.recorder.Event(node, corev1.EventTypeWarning, "ClusterCIDRMisconfigured", warnMsg) } - return false, nil } func findServerPrivateNetByID(srv *hcloud.Server, id int64) (hcloud.ServerPrivateNet, bool) { diff --git a/hcloud/routes_test.go b/hcloud/routes_test.go index fdf8200a..439f2113 100644 --- a/hcloud/routes_test.go +++ b/hcloud/routes_test.go @@ -6,12 +6,28 @@ import ( "net/http" "testing" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" cloudprovider "k8s.io/cloud-provider" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/hetznercloud/hcloud-go/v2/hcloud/schema" ) +func nodeLister(t *testing.T, nodes ...*corev1.Node) corelisters.NodeLister { + indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) + for _, n := range nodes { + if err := indexer.Add(n); err != nil { + t.Fatalf("seed node lister: %v", err) + } + } + return corelisters.NewNodeLister(indexer) +} + const DefaultClusterCIDR = "10.244.0.0/16" func TestRoutes_CreateRoute(t *testing.T) { @@ -72,7 +88,11 @@ func TestRoutes_CreateRoute(t *testing.T) { }, }) }) - routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder) + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node15"}, + Spec: corev1.NodeSpec{ProviderID: "hcloud://1"}, + } + routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder, nodeLister(t, node)) if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -81,6 +101,9 @@ func TestRoutes_CreateRoute(t *testing.T) { Name: "route", TargetNode: "node15", DestinationCIDR: "10.5.0.0/24", + TargetNodeAddresses: []corev1.NodeAddress{ + {Type: corev1.NodeInternalIP, Address: "10.0.0.2"}, + }, }) if err != nil { t.Fatalf("Unexpected error: %v", err) @@ -121,7 +144,7 @@ func TestRoutes_ListRoutes(t *testing.T) { }, }) }) - routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder) + routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder, nodeLister(t)) if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -189,7 +212,7 @@ func TestRoutes_DeleteRoute(t *testing.T) { }, }) }) - routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder) + routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder, nodeLister(t)) if err != nil { t.Fatalf("Unexpected error: %v", err) } @@ -203,3 +226,206 @@ func TestRoutes_DeleteRoute(t *testing.T) { t.Fatalf("Unexpected error: %v", err) } } + +func TestRoutes_CreateRoute_RobotProviderID(t *testing.T) { + env := newTestEnv() + defer env.Teardown() + + env.Mux.HandleFunc("/networks/1", func(w http.ResponseWriter, _ *http.Request) { + json.NewEncoder(w).Encode(schema.NetworkGetResponse{ + Network: schema.Network{ID: 1, Name: "network-1", IPRange: "10.0.0.0/8"}, + }) + }) + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "robot-node"}, + Spec: corev1.NodeSpec{ProviderID: "hrobot://1"}, + } + + routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder, nodeLister(t, node)) + require.NoError(t, err) + + err = routes.CreateRoute(context.TODO(), "my-cluster", "route", &cloudprovider.Route{ + TargetNode: "robot-node", + DestinationCIDR: "10.5.0.0/24", + }) + require.Error(t, err) + assert.ErrorContains(t, err, "not a cloud server") +} + +// TestRoutes_CreateRoute_NodeNameDrift proves the routes controller still works when the +// k8s node name differs from the hcloud server name — the core fix in this PR. The server is +// resolved by its immutable ProviderID rather than by k8s node name. +func TestRoutes_CreateRoute_NodeNameDrift(t *testing.T) { + env := newTestEnv() + defer env.Teardown() + + env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, _ *http.Request) { + json.NewEncoder(w).Encode(schema.ServerListResponse{ + Servers: []schema.Server{ + { + ID: 42, + Name: "original-hostname", // intentionally differs from the k8s node name + PrivateNet: []schema.ServerPrivateNet{ + {Network: 1, IP: "10.0.0.2"}, + }, + }, + }, + }) + }) + env.Mux.HandleFunc("/networks/1", func(w http.ResponseWriter, _ *http.Request) { + json.NewEncoder(w).Encode(schema.NetworkGetResponse{ + Network: schema.Network{ID: 1, Name: "network-1", IPRange: "10.0.0.0/8"}, + }) + }) + env.Mux.HandleFunc("/actions", func(w http.ResponseWriter, _ *http.Request) { + json.NewEncoder(w).Encode(schema.ActionListResponse{ + Actions: []schema.Action{{ID: 1, Status: string(hcloud.ActionStatusSuccess), Progress: 100}}, + }) + }) + env.Mux.HandleFunc("/networks/1/actions/add_route", func(w http.ResponseWriter, r *http.Request) { + var reqBody schema.NetworkActionAddRouteRequest + assert.NoError(t, json.NewDecoder(r.Body).Decode(&reqBody)) + assert.Equal(t, "10.0.0.2", reqBody.Gateway) + json.NewEncoder(w).Encode(schema.NetworkActionAddRouteResponse{ + Action: schema.Action{ID: 1, Status: string(hcloud.ActionStatusRunning)}, + }) + }) + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "drifted-name"}, + Spec: corev1.NodeSpec{ProviderID: "hcloud://42"}, + } + + routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder, nodeLister(t, node)) + require.NoError(t, err) + + err = routes.CreateRoute(context.TODO(), "my-cluster", "route", &cloudprovider.Route{ + TargetNode: "drifted-name", + DestinationCIDR: "10.5.0.0/24", + TargetNodeAddresses: []corev1.NodeAddress{ + {Type: corev1.NodeInternalIP, Address: "10.0.0.2"}, + }, + }) + require.NoError(t, err) +} + +// TestRoutes_CreateRoute_ReplaceStaleRoute asserts that a pre-existing route with a wrong +// gateway is deleted and a new one is added in the same call (upsert-in-place). +func TestRoutes_CreateRoute_ReplaceStaleRoute(t *testing.T) { + env := newTestEnv() + defer env.Teardown() + + env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, _ *http.Request) { + err := json.NewEncoder(w).Encode(schema.ServerListResponse{ + Servers: []schema.Server{ + {ID: 1, Name: "node15", PrivateNet: []schema.ServerPrivateNet{{Network: 1, IP: "10.0.0.2"}}}, + }, + }) + assert.NoError(t, err) + }) + env.Mux.HandleFunc("/networks/1", func(w http.ResponseWriter, _ *http.Request) { + err := json.NewEncoder(w).Encode(schema.NetworkGetResponse{ + Network: schema.Network{ + ID: 1, Name: "network-1", IPRange: "10.0.0.0/8", + Routes: []schema.NetworkRoute{ + {Destination: "10.5.0.0/24", Gateway: "10.99.99.99"}, + }, + }, + }) + assert.NoError(t, err) + }) + env.Mux.HandleFunc("/actions", func(w http.ResponseWriter, _ *http.Request) { + err := json.NewEncoder(w).Encode(schema.ActionListResponse{ + Actions: []schema.Action{{ID: 1, Status: string(hcloud.ActionStatusSuccess), Progress: 100}}, + }) + assert.NoError(t, err) + }) + env.Mux.HandleFunc("/networks/1/actions/delete_route", func(w http.ResponseWriter, r *http.Request) { + var reqBody schema.NetworkActionDeleteRouteRequest + assert.NoError(t, json.NewDecoder(r.Body).Decode(&reqBody)) + assert.Equal(t, "10.99.99.99", reqBody.Gateway) + err := json.NewEncoder(w).Encode(schema.NetworkActionDeleteRouteResponse{ + Action: schema.Action{ID: 1, Status: string(hcloud.ActionStatusRunning)}, + }) + assert.NoError(t, err) + }) + env.Mux.HandleFunc("/networks/1/actions/add_route", func(w http.ResponseWriter, r *http.Request) { + var reqBody schema.NetworkActionAddRouteRequest + assert.NoError(t, json.NewDecoder(r.Body).Decode(&reqBody)) + assert.Equal(t, "10.0.0.2", reqBody.Gateway) + err := json.NewEncoder(w).Encode(schema.NetworkActionAddRouteResponse{ + Action: schema.Action{ID: 1, Status: string(hcloud.ActionStatusRunning)}, + }) + assert.NoError(t, err) + }) + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node15"}, + Spec: corev1.NodeSpec{ProviderID: "hcloud://1"}, + } + + routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder, nodeLister(t, node)) + require.NoError(t, err) + + err = routes.CreateRoute(context.TODO(), "my-cluster", "route", &cloudprovider.Route{ + TargetNode: "node15", + DestinationCIDR: "10.5.0.0/24", + TargetNodeAddresses: []corev1.NodeAddress{ + {Type: corev1.NodeInternalIP, Address: "10.0.0.2"}, + }, + }) + require.NoError(t, err) +} + +// TestRoutes_CreateRoute_AlreadyExists asserts no API mutations happen when the route already +// matches the desired destination + gateway. +func TestRoutes_CreateRoute_AlreadyExists(t *testing.T) { + env := newTestEnv() + defer env.Teardown() + + env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, _ *http.Request) { + json.NewEncoder(w).Encode(schema.ServerListResponse{ + Servers: []schema.Server{ + {ID: 1, Name: "node15", PrivateNet: []schema.ServerPrivateNet{{Network: 1, IP: "10.0.0.2"}}}, + }, + }) + }) + env.Mux.HandleFunc("/networks/1", func(w http.ResponseWriter, _ *http.Request) { + json.NewEncoder(w).Encode(schema.NetworkGetResponse{ + Network: schema.Network{ + ID: 1, Name: "network-1", IPRange: "10.0.0.0/8", + Routes: []schema.NetworkRoute{ + {Destination: "10.5.0.0/24", Gateway: "10.0.0.2"}, + }, + }, + }) + }) + + calledAdd, calledDelete := false, false + env.Mux.HandleFunc("/networks/1/actions/delete_route", func(_ http.ResponseWriter, _ *http.Request) { + calledDelete = true + }) + env.Mux.HandleFunc("/networks/1/actions/add_route", func(_ http.ResponseWriter, _ *http.Request) { + calledAdd = true + }) + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node15"}, + Spec: corev1.NodeSpec{ProviderID: "hcloud://1"}, + } + + routes, err := newRoutes(env.Client, 1, DefaultClusterCIDR, env.Recorder, nodeLister(t, node)) + require.NoError(t, err) + + err = routes.CreateRoute(context.TODO(), "my-cluster", "route", &cloudprovider.Route{ + TargetNode: "node15", + DestinationCIDR: "10.5.0.0/24", + TargetNodeAddresses: []corev1.NodeAddress{ + {Type: corev1.NodeInternalIP, Address: "10.0.0.2"}, + }, + }) + require.NoError(t, err) + assert.False(t, calledAdd, "route should already exist: /add_route must not be called") + assert.False(t, calledDelete, "route should already exist: /delete_route must not be called") +} diff --git a/internal/hcops/server.go b/internal/hcops/server.go index 7872ed73..13498d05 100644 --- a/internal/hcops/server.go +++ b/internal/hcops/server.go @@ -32,6 +32,7 @@ type AllServersCache struct { lastRefresh time.Time byPrivIP map[string]*hcloud.Server byName map[string]*hcloud.Server + byID map[int64]*hcloud.Server mu sync.Mutex } @@ -77,6 +78,26 @@ func (c *AllServersCache) ByName(ctx context.Context, name string) (*hcloud.Serv return srv, nil } +// ByID obtains a server from the cache using the servers id. +// +// Note that a pointer to the object stored in the cache is returned. Modifying +// this object affects the cache and all other code parts holding a reference. +// Furthermore, modifying the returned server is not concurrency safe. +func (c *AllServersCache) ByID(ctx context.Context, id int64) (*hcloud.Server, error) { + const op = "hcops/AllServersCache.ByID" + metrics.OperationCalled.WithLabelValues(op).Inc() + + srv, err := c.getFromCache(ctx, func() (*hcloud.Server, bool) { + srv, ok := c.byID[id] + return srv, ok + }) + if err != nil { + return nil, fmt.Errorf("%s: %d %w", op, id, err) + } + + return srv, nil +} + // getFromCache wraps the cache maps with expiry time and "get-on-unavailable" functionality. func (c *AllServersCache) getFromCache(ctx context.Context, retrieveFromCacheMaps func() (*hcloud.Server, bool)) (*hcloud.Server, error) { const op = "hcops/AllServersCache.getCache" @@ -136,6 +157,7 @@ func (c *AllServersCache) refreshCache(ctx context.Context) error { // Re-initialize all maps. This effectively clears the current cache. c.byPrivIP = make(map[string]*hcloud.Server) c.byName = make(map[string]*hcloud.Server) + c.byID = make(map[int64]*hcloud.Server) for _, server := range servers { // Index servers by the IPs of their private networks @@ -157,6 +179,9 @@ func (c *AllServersCache) refreshCache(ctx context.Context) error { // Index servers by their names. c.byName[server.Name] = server + + // Index servers by their ID. + c.byID[server.ID] = server } c.lastRefresh = time.Now() diff --git a/internal/hcops/server_test.go b/internal/hcops/server_test.go index 142ddceb..5d205a77 100644 --- a/internal/hcops/server_test.go +++ b/internal/hcops/server_test.go @@ -375,6 +375,12 @@ func newAllServersCacheOps(t *testing.T, srv *hcloud.Server) map[string]allServe } return c.ByName(t.Context(), srv.Name) }, + "ByID": func(c *hcops.AllServersCache) (*hcloud.Server, error) { + if srv.ID == 0 { + t.Fatal("ByID: server has no id") + } + return c.ByID(t.Context(), srv.ID) + }, } } diff --git a/main.go b/main.go index 2905310a..61dd5eb2 100644 --- a/main.go +++ b/main.go @@ -62,7 +62,9 @@ func main() { } func cloudInitializer(config *config.CompletedConfig) cloudprovider.Interface { - cloud, err := hcloud.NewCloud(config.ComponentConfig.KubeCloudShared.ClusterCIDR) + nodeLister := config.SharedInformers.Core().V1().Nodes().Lister() + + cloud, err := hcloud.NewCloud(config.ComponentConfig.KubeCloudShared.ClusterCIDR, nodeLister) if err != nil { klog.Fatalf("Cloud provider could not be initialized: %v", err) }