From c5d199579e4d9c0009b36456332889ec4ed9dc42 Mon Sep 17 00:00:00 2001 From: Yuvaraj Kakaraparthi Date: Mon, 3 Apr 2023 18:12:34 -0700 Subject: [PATCH] mitigate kubeadm join issue when classy cluster is upgrading --- .../topology/cluster/conditions.go | 5 + .../topology/cluster/conditions_test.go | 106 +++++------- .../topology/cluster/desired_state.go | 24 ++- .../topology/cluster/desired_state_test.go | 163 +++++------------- .../topology/cluster/scope/state.go | 55 +++++- .../topology/cluster/scope/state_test.go | 157 +++++++++++++++++ .../topology/cluster/scope/upgradetracker.go | 21 ++- internal/test/builder/builders.go | 15 +- .../test/builder/zz_generated.deepcopy.go | 5 + 9 files changed, 357 insertions(+), 194 deletions(-) create mode 100644 internal/controllers/topology/cluster/scope/state_test.go diff --git a/internal/controllers/topology/cluster/conditions.go b/internal/controllers/topology/cluster/conditions.go index c66e3cbc6579..d2c3df56ec93 100644 --- a/internal/controllers/topology/cluster/conditions.go +++ b/internal/controllers/topology/cluster/conditions.go @@ -133,6 +133,11 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste case s.UpgradeTracker.ControlPlane.IsScaling: msgBuilder.WriteString(" Control plane is reconciling desired replicas") + case len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) > 0: + fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are upgrading", + computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()), + ) + case s.Current.MachineDeployments.IsAnyRollingOut(): msgBuilder.WriteString(fmt.Sprintf(" MachineDeployment(s) %s are rolling out", computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.RolloutNames()), diff --git a/internal/controllers/topology/cluster/conditions_test.go b/internal/controllers/topology/cluster/conditions_test.go index caf6e6378132..ffb5837e86f1 100644 --- a/internal/controllers/topology/cluster/conditions_test.go +++ b/internal/controllers/topology/cluster/conditions_test.go @@ -23,6 +23,9 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" @@ -32,11 +35,16 @@ import ( ) func TestReconcileTopologyReconciledCondition(t *testing.T) { + g := NewWithT(t) + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + tests := []struct { name string reconcileErr error s *scope.Scope cluster *clusterv1.Cluster + machines []*clusterv1.Machine wantConditionStatus corev1.ConditionStatus wantConditionReason string wantConditionMessage string @@ -382,7 +390,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { wantConditionStatus: corev1.ConditionTrue, }, { - name: "should set the condition to false is some machine deployments have not picked the new version because other machine deployments are rolling out (not all replicas ready)", + name: "should set the condition to false is some machine deployments have not picked the new version because other machine deployments are upgrading", reconcileErr: nil, cluster: &clusterv1.Cluster{}, s: &scope.Scope{ @@ -404,6 +412,11 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { Object: builder.MachineDeployment("ns1", "md0-abc123"). WithReplicas(2). WithVersion("v1.22.0"). + WithSelector(metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md0", + }, + }). WithStatus(clusterv1.MachineDeploymentStatus{ // MD is not ready because we don't have 2 updated, ready and available replicas. Replicas: int32(2), @@ -418,67 +431,11 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { Object: builder.MachineDeployment("ns1", "md1-abc123"). WithReplicas(2). WithVersion("v1.21.2"). - WithStatus(clusterv1.MachineDeploymentStatus{ - Replicas: int32(2), - UpdatedReplicas: int32(2), - ReadyReplicas: int32(2), - AvailableReplicas: int32(2), - UnavailableReplicas: int32(0), + WithSelector(metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md1", + }, }). - Build(), - }, - }, - }, - UpgradeTracker: func() *scope.UpgradeTracker { - ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false - ut.MachineDeployments.MarkRollingOut("md0-abc123") - ut.MachineDeployments.MarkPendingUpgrade("md1-abc123") - return ut - }(), - HookResponseTracker: scope.NewHookResponseTracker(), - }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are rolling out", - }, - { - name: "should set the condition to false if some machine deployments have not picked the new version because other machine deployments are rolling out (unavailable replica)", - reconcileErr: nil, - cluster: &clusterv1.Cluster{}, - s: &scope.Scope{ - Blueprint: &scope.ClusterBlueprint{ - Topology: &clusterv1.Topology{ - Version: "v1.22.0", - }, - }, - Current: &scope.ClusterState{ - Cluster: &clusterv1.Cluster{}, - ControlPlane: &scope.ControlPlaneState{ - Object: builder.ControlPlane("ns1", "controlplane1"). - WithVersion("v1.22.0"). - WithReplicas(3). - Build(), - }, - MachineDeployments: scope.MachineDeploymentsStateMap{ - "md0": &scope.MachineDeploymentState{ - Object: builder.MachineDeployment("ns1", "md0-abc123"). - WithReplicas(2). - WithVersion("v1.22.0"). - WithStatus(clusterv1.MachineDeploymentStatus{ - // MD is not ready because we still have an unavailable replica. - Replicas: int32(2), - UpdatedReplicas: int32(2), - ReadyReplicas: int32(2), - AvailableReplicas: int32(2), - UnavailableReplicas: int32(1), - }). - Build(), - }, - "md1": &scope.MachineDeploymentState{ - Object: builder.MachineDeployment("ns1", "md1-abc123"). - WithReplicas(2). - WithVersion("v1.21.2"). WithStatus(clusterv1.MachineDeploymentStatus{ Replicas: int32(2), UpdatedReplicas: int32(2), @@ -493,15 +450,25 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() ut.ControlPlane.PendingUpgrade = false - ut.MachineDeployments.MarkRollingOut("md0-abc123") + ut.MachineDeployments.MarkUpgradingAndRollingOut("md0-abc123") ut.MachineDeployments.MarkPendingUpgrade("md1-abc123") return ut }(), HookResponseTracker: scope.NewHookResponseTracker(), }, + machines: []*clusterv1.Machine{ + builder.Machine("ns1", "md0-machine0"). + WithLabels(map[string]string{clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md0"}). + WithVersion("v1.21.2"). // Machine's version does not match MachineDeployment's version + Build(), + builder.Machine("ns1", "md1-machine0"). + WithLabels(map[string]string{clusterv1.ClusterTopologyMachineDeploymentNameLabel: "md1"}). + WithVersion("v1.21.2"). + Build(), + }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are rolling out", + wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", }, { name: "should set the condition to false if some machine deployments have not picked the new version because their upgrade has been deferred", @@ -624,7 +591,18 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - r := &Reconciler{} + objs := []client.Object{} + if tt.s != nil && tt.s.Current != nil { + for _, mds := range tt.s.Current.MachineDeployments { + objs = append(objs, mds.Object) + } + } + for _, m := range tt.machines { + objs = append(objs, m) + } + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() + + r := &Reconciler{Client: fakeClient} err := r.reconcileTopologyReconciledCondition(tt.s, tt.cluster, tt.reconcileErr) if tt.wantErr { g.Expect(err).To(HaveOccurred()) diff --git a/internal/controllers/topology/cluster/desired_state.go b/internal/controllers/topology/cluster/desired_state.go index aae24f8c42ce..31c2828dc273 100644 --- a/internal/controllers/topology/cluster/desired_state.go +++ b/internal/controllers/topology/cluster/desired_state.go @@ -88,7 +88,7 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (* // If required, compute the desired state of the MachineDeployments from the list of MachineDeploymentTopologies // defined in the cluster. if s.Blueprint.HasMachineDeployments() { - desiredState.MachineDeployments, err = computeMachineDeployments(ctx, s, desiredState.ControlPlane) + desiredState.MachineDeployments, err = r.computeMachineDeployments(ctx, s, desiredState.ControlPlane) if err != nil { return nil, errors.Wrapf(err, "failed to compute MachineDeployments") } @@ -522,12 +522,22 @@ func calculateRefDesiredAPIVersion(currentRef *corev1.ObjectReference, desiredRe } // computeMachineDeployments computes the desired state of the list of MachineDeployments. -func computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) { - // Mark all the machine deployments that are currently rolling out. - // This captured information will be used for - // - Building the TopologyReconciled condition. - // - Making upgrade decisions on machine deployments. +func (r *Reconciler) computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) { + // Mark all the MachineDeployments that are currently upgrading. + // This captured information is used for: + // - Building the TopologyReconciled condition. + // - Making upgrade decisions on machine deployments. + upgradingMDs, err := s.Current.MachineDeployments.Upgrading(ctx, r.Client) + if err != nil { + return nil, err + } + s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(upgradingMDs...) + + // Mark all MachineDeployments that are currently rolling out. + // This captured information is used for: + // - Building the TopologyReconciled condition (when control plane upgrade is pending) s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...) + machineDeploymentsStateMap := make(scope.MachineDeploymentsStateMap) for _, mdTopology := range s.Blueprint.Topology.Workers.MachineDeployments { desiredMachineDeployment, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology) @@ -843,7 +853,7 @@ func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology c // Control plane and machine deployments are stable. // Ready to pick up the topology version. - s.UpgradeTracker.MachineDeployments.MarkRollingOut(currentMDState.Object.Name) + s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(currentMDState.Object.Name) return desiredVersion, nil } diff --git a/internal/controllers/topology/cluster/desired_state_test.go b/internal/controllers/topology/cluster/desired_state_test.go index 0e81f0ed25a6..e3302d0e6b63 100644 --- a/internal/controllers/topology/cluster/desired_state_test.go +++ b/internal/controllers/topology/cluster/desired_state_test.go @@ -1635,69 +1635,48 @@ func TestComputeMachineDeployment(t *testing.T) { }). Build() - machineDeploymentStable := builder.MachineDeployment("test-namespace", "md-stable"). - WithGeneration(1). - WithReplicas(2). - WithStatus(clusterv1.MachineDeploymentStatus{ - ObservedGeneration: 2, - Replicas: 2, - ReadyReplicas: 2, - UpdatedReplicas: 2, - AvailableReplicas: 2, - }). - Build() - - machineDeploymentRollingOut := builder.MachineDeployment("test-namespace", "md-rolling"). - WithGeneration(1). - WithReplicas(2). - WithStatus(clusterv1.MachineDeploymentStatus{ - ObservedGeneration: 2, - Replicas: 1, - ReadyReplicas: 1, - UpdatedReplicas: 1, - AvailableReplicas: 1, - }). - Build() - - machineDeploymentsStateRollingOut := scope.MachineDeploymentsStateMap{ - "class-1": &scope.MachineDeploymentState{Object: machineDeploymentStable}, - "class-2": &scope.MachineDeploymentState{Object: machineDeploymentRollingOut}, - } - // Note: in all the following tests we are setting it up so that the control plane is already // stable at the topology version. // A more extensive list of scenarios is tested in TestComputeMachineDeploymentVersion. tests := []struct { - name string - machineDeploymentsState scope.MachineDeploymentsStateMap - currentMDVersion *string - upgradeConcurrency string - topologyVersion string - expectedVersion string + name string + upgradingMachineDeployments []string + currentMDVersion *string + upgradeConcurrency string + topologyVersion string + expectedVersion string }{ { - name: "use cluster.spec.topology.version if creating a new machine deployment", - machineDeploymentsState: nil, - upgradeConcurrency: "1", - currentMDVersion: nil, - topologyVersion: "v1.2.3", - expectedVersion: "v1.2.3", + name: "use cluster.spec.topology.version if creating a new machine deployment", + upgradingMachineDeployments: []string{}, + upgradeConcurrency: "1", + currentMDVersion: nil, + topologyVersion: "v1.2.3", + expectedVersion: "v1.2.3", + }, + { + name: "use cluster.spec.topology.version if creating a new machine deployment while another machine deployment is upgrading", + upgradingMachineDeployments: []string{"upgrading-md1"}, + upgradeConcurrency: "1", + currentMDVersion: nil, + topologyVersion: "v1.2.3", + expectedVersion: "v1.2.3", }, { - name: "use machine deployment's spec.template.spec.version if one of the machine deployments is rolling out, concurrency limit reached", - machineDeploymentsState: machineDeploymentsStateRollingOut, - upgradeConcurrency: "1", - currentMDVersion: pointer.String("v1.2.2"), - topologyVersion: "v1.2.3", - expectedVersion: "v1.2.2", + name: "use machine deployment's spec.template.spec.version if one of the machine deployments is upgrading, concurrency limit reached", + upgradingMachineDeployments: []string{"upgrading-md1"}, + upgradeConcurrency: "1", + currentMDVersion: pointer.String("v1.2.2"), + topologyVersion: "v1.2.3", + expectedVersion: "v1.2.2", }, { - name: "use cluster.spec.topology.version if one of the machine deployments is rolling out, concurrency limit not reached", - machineDeploymentsState: machineDeploymentsStateRollingOut, - upgradeConcurrency: "2", - currentMDVersion: pointer.String("v1.2.2"), - topologyVersion: "v1.2.3", - expectedVersion: "v1.2.3", + name: "use cluster.spec.topology.version if one of the machine deployments is upgrading, concurrency limit not reached", + upgradingMachineDeployments: []string{"upgrading-md1"}, + upgradeConcurrency: "2", + currentMDVersion: pointer.String("v1.2.2"), + topologyVersion: "v1.2.3", + expectedVersion: "v1.2.3", }, } for _, tt := range tests { @@ -1718,7 +1697,7 @@ func TestComputeMachineDeployment(t *testing.T) { } s.Blueprint.Topology.Workers = &clusterv1.WorkersTopology{} - mdsState := tt.machineDeploymentsState + mdsState := scope.MachineDeploymentsStateMap{} if tt.currentMDVersion != nil { // testing a case with an existing machine deployment // add the stable machine deployment to the current machine deployments state @@ -1753,7 +1732,7 @@ func TestComputeMachineDeployment(t *testing.T) { Name: "big-pool-of-machines", Replicas: pointer.Int32(2), } - + s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(tt.upgradingMachineDeployments...) obj, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology) g.Expect(err).NotTo(HaveOccurred()) g.Expect(*obj.Object.Spec.Template.Spec.Version).To(Equal(tt.expectedVersion)) @@ -1841,63 +1820,11 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { }). Build() - // A machine deployment is considered stable if all the following are true: - // - md.spec.replicas == md.status.replicas - // - md.spec.replicas == md.status.updatedReplicas - // - md.spec.replicas == md.status.readyReplicas - // - md.status.unavailableReplicas == 0 - // - md.Generation < md.status.observedGeneration - // - // A machine deployment is considered upgrading if any of the above conditions - // is false. - stableMachineDeployment := func(ns, name string) *clusterv1.MachineDeployment { - return builder.MachineDeployment(ns, name). - WithGeneration(1). - WithReplicas(2). - WithStatus(clusterv1.MachineDeploymentStatus{ - ObservedGeneration: 2, - Replicas: 2, - UpdatedReplicas: 2, - AvailableReplicas: 2, - ReadyReplicas: 2, - UnavailableReplicas: 0, - }). - Build() - } - - rollingMachineDeployment := func(ns, name string) *clusterv1.MachineDeployment { - return builder.MachineDeployment(ns, name). - WithGeneration(1). - WithReplicas(2). - WithStatus(clusterv1.MachineDeploymentStatus{ - ObservedGeneration: 2, - Replicas: 1, - UpdatedReplicas: 1, - AvailableReplicas: 1, - ReadyReplicas: 1, - UnavailableReplicas: 1, - }). - Build() - } - - twoMachineDeploymentsStateStable := scope.MachineDeploymentsStateMap{ - "md1": &scope.MachineDeploymentState{Object: stableMachineDeployment("test1", "md1")}, - "md2": &scope.MachineDeploymentState{Object: stableMachineDeployment("test1", "md2")}, - } - oneStableOneRollingMachineDeploymentState := scope.MachineDeploymentsStateMap{ - "md1": &scope.MachineDeploymentState{Object: stableMachineDeployment("test1", "md1")}, - "md2": &scope.MachineDeploymentState{Object: rollingMachineDeployment("test1", "md2")}, - } - twoRollingMachineDeploymentState := scope.MachineDeploymentsStateMap{ - "md1": &scope.MachineDeploymentState{Object: rollingMachineDeployment("test1", "md1")}, - "md2": &scope.MachineDeploymentState{Object: rollingMachineDeployment("test1", "md2")}, - } - tests := []struct { name string machineDeploymentTopology clusterv1.MachineDeploymentTopology currentMachineDeploymentState *scope.MachineDeploymentState - machineDeploymentsStateMap scope.MachineDeploymentsStateMap + upgradingMachineDeployments []string upgradeConcurrency int currentControlPlane *unstructured.Unstructured desiredControlPlane *unstructured.Unstructured @@ -1907,7 +1834,6 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { { name: "should return cluster.spec.topology.version if creating a new machine deployment", currentMachineDeploymentState: nil, - machineDeploymentsStateMap: make(scope.MachineDeploymentsStateMap), topologyVersion: "v1.2.3", expectedVersion: "v1.2.3", }, @@ -1921,7 +1847,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { }, }, currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, - machineDeploymentsStateMap: twoMachineDeploymentsStateStable, + upgradingMachineDeployments: []string{}, currentControlPlane: controlPlaneStable123, desiredControlPlane: controlPlaneDesired, topologyVersion: "v1.2.3", @@ -1931,7 +1857,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { // Control plane is considered upgrading if the control plane's spec.version and status.version is not equal. name: "should return machine deployment's spec.template.spec.version if control plane is upgrading", currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, - machineDeploymentsStateMap: twoMachineDeploymentsStateStable, + upgradingMachineDeployments: []string{}, currentControlPlane: controlPlaneUpgrading, topologyVersion: "v1.2.3", expectedVersion: "v1.2.2", @@ -1940,7 +1866,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { // Control plane is considered ready to upgrade if spec.version of current and desired control planes are not equal. name: "should return machine deployment's spec.template.spec.version if control plane is ready to upgrade", currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, - machineDeploymentsStateMap: twoMachineDeploymentsStateStable, + upgradingMachineDeployments: []string{}, currentControlPlane: controlPlaneStable122, desiredControlPlane: controlPlaneDesired, topologyVersion: "v1.2.3", @@ -1950,7 +1876,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { // Control plane is considered scaling if its spec.replicas is not equal to any of status.replicas, status.readyReplicas or status.updatedReplicas. name: "should return machine deployment's spec.template.spec.version if control plane is scaling", currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, - machineDeploymentsStateMap: twoMachineDeploymentsStateStable, + upgradingMachineDeployments: []string{}, currentControlPlane: controlPlaneScaling, topologyVersion: "v1.2.3", expectedVersion: "v1.2.2", @@ -1958,7 +1884,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { { name: "should return cluster.spec.topology.version if the control plane is not upgrading, not scaling, not ready to upgrade and none of the machine deployments are rolling out", currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, - machineDeploymentsStateMap: twoMachineDeploymentsStateStable, + upgradingMachineDeployments: []string{}, currentControlPlane: controlPlaneStable123, desiredControlPlane: controlPlaneDesired, topologyVersion: "v1.2.3", @@ -1967,7 +1893,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { { name: "should return cluster.spec.topology.version if control plane is stable, other machine deployments are rolling out, concurrency limit not reached", currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, - machineDeploymentsStateMap: oneStableOneRollingMachineDeploymentState, + upgradingMachineDeployments: []string{"upgrading-md1"}, upgradeConcurrency: 2, currentControlPlane: controlPlaneStable123, desiredControlPlane: controlPlaneDesired, @@ -1977,7 +1903,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { { name: "should return machine deployment's spec.template.spec.version if control plane is stable, other machine deployments are rolling out, concurrency limit reached", currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, - machineDeploymentsStateMap: twoRollingMachineDeploymentState, + upgradingMachineDeployments: []string{"upgrading-md1", "upgrading-md2"}, upgradeConcurrency: 2, currentControlPlane: controlPlaneStable123, desiredControlPlane: controlPlaneDesired, @@ -1999,13 +1925,12 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { Workers: &clusterv1.WorkersTopology{}, }}, Current: &scope.ClusterState{ - ControlPlane: &scope.ControlPlaneState{Object: tt.currentControlPlane}, - MachineDeployments: tt.machineDeploymentsStateMap, + ControlPlane: &scope.ControlPlaneState{Object: tt.currentControlPlane}, }, UpgradeTracker: scope.NewUpgradeTracker(scope.MaxMDUpgradeConcurrency(tt.upgradeConcurrency)), } desiredControlPlaneState := &scope.ControlPlaneState{Object: tt.desiredControlPlane} - s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...) + s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(tt.upgradingMachineDeployments...) version, err := computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, desiredControlPlaneState, tt.currentMachineDeploymentState) g.Expect(err).NotTo(HaveOccurred()) g.Expect(version).To(Equal(tt.expectedVersion)) diff --git a/internal/controllers/topology/cluster/scope/state.go b/internal/controllers/topology/cluster/scope/state.go index 220df1ba6608..3fb87de02d7b 100644 --- a/internal/controllers/topology/cluster/scope/state.go +++ b/internal/controllers/topology/cluster/scope/state.go @@ -17,7 +17,13 @@ limitations under the License. package scope import ( + "context" + "fmt" + + "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" @@ -74,6 +80,22 @@ func (mds MachineDeploymentsStateMap) IsAnyRollingOut() bool { return len(mds.RollingOut()) != 0 } +// Upgrading returns the list of the machine deployments +// that are upgrading. +func (mds MachineDeploymentsStateMap) Upgrading(ctx context.Context, c client.Client) ([]string, error) { + names := []string{} + for _, md := range mds { + upgrading, err := md.IsUpgrading(ctx, c) + if err != nil { + return nil, errors.Wrap(err, "failed to list upgrading MachineDeployments") + } + if upgrading { + names = append(names, md.Object.Name) + } + } + return names, nil +} + // MachineDeploymentState holds all the objects representing the state of a managed deployment. type MachineDeploymentState struct { // Object holds the MachineDeployment object. @@ -90,7 +112,7 @@ type MachineDeploymentState struct { MachineHealthCheck *clusterv1.MachineHealthCheck } -// IsRollingOut determines if the machine deployment is upgrading. +// IsRollingOut determines if the machine deployment is rolling out. // A machine deployment is considered upgrading if: // - if any of the replicas of the machine deployment is not ready. func (md *MachineDeploymentState) IsRollingOut() bool { @@ -98,3 +120,34 @@ func (md *MachineDeploymentState) IsRollingOut() bool { *md.Object.Spec.Replicas != md.Object.Status.ReadyReplicas || md.Object.Status.UnavailableReplicas > 0 } + +// IsUpgrading determines if the MachineDeployment is upgrading. +// A machine deployment is considered upgrading if at least one of the Machines of this +// MachineDeployment has a different version. +func (md *MachineDeploymentState) IsUpgrading(ctx context.Context, c client.Client) (bool, error) { + // If the MachineDeployment has no version there is no definitive way to check if it is upgrading. Therefore, return false. + // Note: This case should not happen. + if md.Object.Spec.Template.Spec.Version == nil { + return false, nil + } + selectorMap, err := metav1.LabelSelectorAsMap(&md.Object.Spec.Selector) + if err != nil { + return false, errors.Wrapf(err, "failed to check if MachineDeployment %s is upgrading: failed to convert label selector to map", md.Object.Name) + } + machines := &clusterv1.MachineList{} + if err := c.List(ctx, machines, client.InNamespace(md.Object.Namespace), client.MatchingLabels(selectorMap)); err != nil { + return false, errors.Wrapf(err, "failed to check if MachineDeployment %s is upgrading: failed to list Machines", md.Object.Name) + } + mdVersion := *md.Object.Spec.Template.Spec.Version + // Check if the versions of the all the Machines match the MachineDeployment version. + for i := range machines.Items { + machine := machines.Items[i] + if machine.Spec.Version == nil { + return false, fmt.Errorf("failed to check if MachineDeployment %s is upgrading: Machine %s has no version", md.Object.Name, machine.Name) + } + if *machine.Spec.Version != mdVersion { + return true, nil + } + } + return false, nil +} diff --git a/internal/controllers/topology/cluster/scope/state_test.go b/internal/controllers/topology/cluster/scope/state_test.go new file mode 100644 index 000000000000..f0683cad2952 --- /dev/null +++ b/internal/controllers/topology/cluster/scope/state_test.go @@ -0,0 +1,157 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + "context" + "testing" + + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/internal/test/builder" +) + +func TestIsUpgrading(t *testing.T) { + g := NewWithT(t) + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + + tests := []struct { + name string + md *clusterv1.MachineDeployment + machines []*clusterv1.Machine + want bool + wantErr bool + }{ + { + name: "should return false if all the machines of MachineDeployment have the same version as the MachineDeployment", + md: builder.MachineDeployment("ns", "md1"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build(), + machines: []*clusterv1.Machine{ + builder.Machine("ns", "machine1"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build(), + builder.Machine("ns", "machine2"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build(), + }, + want: false, + wantErr: false, + }, + { + name: "should return true if at least one of the machines of MachineDeployment has a different version", + md: builder.MachineDeployment("ns", "md1"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build(), + machines: []*clusterv1.Machine{ + builder.Machine("ns", "machine1"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build(), + builder.Machine("ns", "machine2"). + WithClusterName("cluster1"). + WithVersion("v1.2.2"). + Build(), + }, + want: true, + wantErr: false, + }, + { + name: "should return false if the MachineDeployment has no machines (creation phase)", + md: builder.MachineDeployment("ns", "md1"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build(), + machines: []*clusterv1.Machine{}, + want: false, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + objs := []client.Object{} + objs = append(objs, tt.md) + for _, m := range tt.machines { + objs = append(objs, m) + } + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() + mdState := &MachineDeploymentState{ + Object: tt.md, + } + got, err := mdState.IsUpgrading(ctx, fakeClient) + if tt.wantErr { + g.Expect(err).NotTo(BeNil()) + } else { + g.Expect(err).To(BeNil()) + g.Expect(got).To(Equal(tt.want)) + } + }) + } +} + +func TestUpgrading(t *testing.T) { + g := NewWithT(t) + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + + ctx := context.Background() + + t.Run("should return the names of the upgrading MachineDeployments", func(t *testing.T) { + stableMD := builder.MachineDeployment("ns", "stableMD"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build() + stableMDMachine := builder.Machine("ns", "stableMD-machine1"). + WithClusterName("cluster1"). + WithVersion("v1.2.3"). + Build() + + upgradingMD := builder.MachineDeployment("ns", "upgradingMD"). + WithClusterName("cluster2"). + WithVersion("v1.2.3"). + Build() + upgradingMDMachine := builder.Machine("ns", "upgradingMD-machine1"). + WithClusterName("cluster2"). + WithVersion("v1.2.2"). + Build() + + objs := []client.Object{stableMD, stableMDMachine, upgradingMD, upgradingMDMachine} + fakeClient := fake.NewClientBuilder().WithObjects(objs...).WithScheme(scheme).Build() + + mdsStateMap := MachineDeploymentsStateMap{ + "stableMD": {Object: stableMD}, + "upgradingMD": {Object: upgradingMD}, + } + want := []string{"upgradingMD"} + + got, err := mdsStateMap.Upgrading(ctx, fakeClient) + g.Expect(err).To(BeNil()) + g.Expect(got).To(BeComparableTo(want)) + }) +} diff --git a/internal/controllers/topology/cluster/scope/upgradetracker.go b/internal/controllers/topology/cluster/scope/upgradetracker.go index f7fad8db0e30..7bd66d658397 100644 --- a/internal/controllers/topology/cluster/scope/upgradetracker.go +++ b/internal/controllers/topology/cluster/scope/upgradetracker.go @@ -46,6 +46,7 @@ type ControlPlaneUpgradeTracker struct { type MachineDeploymentUpgradeTracker struct { pendingNames sets.Set[string] deferredNames sets.Set[string] + upgradingNames sets.Set[string] rollingOutNames sets.Set[string] holdUpgrades bool maxMachineDeploymentUpgradeConcurrency int @@ -85,6 +86,7 @@ func NewUpgradeTracker(opts ...UpgradeTrackerOption) *UpgradeTracker { pendingNames: sets.Set[string]{}, deferredNames: sets.Set[string]{}, rollingOutNames: sets.Set[string]{}, + upgradingNames: sets.Set[string]{}, maxMachineDeploymentUpgradeConcurrency: options.maxMDUpgradeConcurrency, }, } @@ -101,12 +103,29 @@ func (m *MachineDeploymentUpgradeTracker) MarkRollingOut(names ...string) { } } +// MarkUpgradingAndRollingOut marks a MachineDeployment as currently upgrading or about to upgrade. +// NOTE: Marking a MachineDeployment as upgrading also marks it as RollingOut. +func (m *MachineDeploymentUpgradeTracker) MarkUpgradingAndRollingOut(names ...string) { + for _, name := range names { + m.upgradingNames.Insert(name) + m.rollingOutNames.Insert(name) + } +} + // RolloutNames returns the list of machine deployments that are rolling out or // are about to rollout. +// Note: This also includes the names of MachineDeployments that are upgrading +// MachineDeployments are also considered rolling out. func (m *MachineDeploymentUpgradeTracker) RolloutNames() []string { return sets.List(m.rollingOutNames) } +// UpgradingNames returns the list of machine deployments that are upgrading or +// are about to upgrade. +func (m *MachineDeploymentUpgradeTracker) UpgradingNames() []string { + return sets.List(m.upgradingNames) +} + // HoldUpgrades is used to set if any subsequent upgrade operations should be paused, // e.g. because a AfterControlPlaneUpgrade hook response asked to do so. // If HoldUpgrades is called with `true` then AllowUpgrade would return false. @@ -123,7 +142,7 @@ func (m *MachineDeploymentUpgradeTracker) AllowUpgrade() bool { if m.holdUpgrades { return false } - return m.rollingOutNames.Len() < m.maxMachineDeploymentUpgradeConcurrency + return m.upgradingNames.Len() < m.maxMachineDeploymentUpgradeConcurrency } // MarkPendingUpgrade marks a machine deployment as in need of an upgrade. diff --git a/internal/test/builder/builders.go b/internal/test/builder/builders.go index b9a7cb1892fd..49f1b3ea363c 100644 --- a/internal/test/builder/builders.go +++ b/internal/test/builder/builders.go @@ -1147,6 +1147,7 @@ type MachineDeploymentBuilder struct { clusterName string bootstrapTemplate *unstructured.Unstructured infrastructureTemplate *unstructured.Unstructured + selector *metav1.LabelSelector version *string replicas *int32 defaulter bool @@ -1175,6 +1176,12 @@ func (m *MachineDeploymentBuilder) WithInfrastructureTemplate(ref *unstructured. return m } +// WithSelector adds the passed selector to the MachineDeployment as the selector. +func (m *MachineDeploymentBuilder) WithSelector(selector metav1.LabelSelector) *MachineDeploymentBuilder { + m.selector = &selector + return m +} + // WithClusterName adds the clusterName to the MachineDeploymentBuilder. func (m *MachineDeploymentBuilder) WithClusterName(name string) *MachineDeploymentBuilder { m.clusterName = name @@ -1243,15 +1250,19 @@ func (m *MachineDeploymentBuilder) Build() *clusterv1.MachineDeployment { if m.infrastructureTemplate != nil { obj.Spec.Template.Spec.InfrastructureRef = *objToRef(m.infrastructureTemplate) } + if m.selector != nil { + obj.Spec.Selector = *m.selector + } if m.status != nil { obj.Status = *m.status } if m.clusterName != "" { obj.Spec.Template.Spec.ClusterName = m.clusterName obj.Spec.ClusterName = m.clusterName - obj.Spec.Selector.MatchLabels = map[string]string{ - clusterv1.ClusterNameLabel: m.clusterName, + if obj.Spec.Selector.MatchLabels == nil { + obj.Spec.Selector.MatchLabels = map[string]string{} } + obj.Spec.Selector.MatchLabels[clusterv1.ClusterNameLabel] = m.clusterName obj.Spec.Template.Labels = map[string]string{ clusterv1.ClusterNameLabel: m.clusterName, } diff --git a/internal/test/builder/zz_generated.deepcopy.go b/internal/test/builder/zz_generated.deepcopy.go index 9cdb13970942..c1efb456be8a 100644 --- a/internal/test/builder/zz_generated.deepcopy.go +++ b/internal/test/builder/zz_generated.deepcopy.go @@ -343,6 +343,11 @@ func (in *MachineDeploymentBuilder) DeepCopyInto(out *MachineDeploymentBuilder) in, out := &in.infrastructureTemplate, &out.infrastructureTemplate *out = (*in).DeepCopy() } + if in.selector != nil { + in, out := &in.selector, &out.selector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } if in.version != nil { in, out := &in.version, &out.version *out = new(string)