Skip to content

Commit

Permalink
Add Namespace Scoped Zone Discovery
Browse files Browse the repository at this point in the history
- Introduce a feature flag to enable Namespace Scoped Zone.
- Enhance zone discovery to support Namespace Scoped Zones.
- Filter out zones marked for deletion during the discovery process.

Signed-off-by: Gong Zhang <[email protected]>
  • Loading branch information
zhanggbj committed Aug 8, 2024
1 parent 98f6510 commit e19a5cd
Show file tree
Hide file tree
Showing 14 changed files with 866 additions and 567 deletions.
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
- "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}"
- "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}"
- --v=4
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false}"
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZone=${EXP_NAMESPACE_SCOPED_ZONE:=false}"
image: controller:latest
imagePullPolicy: IfNotPresent
name: manager
Expand Down
58 changes: 47 additions & 11 deletions controllers/vmware/vspherecluster_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ import (
"fmt"

"github.com/pkg/errors"
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
clusterutilv1 "sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
Expand All @@ -40,6 +40,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/external/tanzu-topology/api/v1alpha1"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/util"
Expand Down Expand Up @@ -160,7 +161,7 @@ func (r *ClusterReconciler) reconcileDelete(clusterCtx *vmware.ClusterContext) {

func (r *ClusterReconciler) reconcileNormal(ctx context.Context, clusterCtx *vmware.ClusterContext) error {
// Get any failure domains to report back to the CAPI core controller.
failureDomains, err := r.getFailureDomains(ctx)
failureDomains, err := r.getFailureDomains(ctx, clusterCtx)
if err != nil {
return errors.Wrapf(
err,
Expand Down Expand Up @@ -371,22 +372,57 @@ func (r *ClusterReconciler) VSphereMachineToCluster(ctx context.Context, o clien

// Returns the failure domain information discovered on the cluster
// hosting this controller.
func (r *ClusterReconciler) getFailureDomains(ctx context.Context) (clusterv1.FailureDomains, error) {
availabilityZoneList := &topologyv1.AvailabilityZoneList{}
if err := r.Client.List(ctx, availabilityZoneList); err != nil {
return nil, err
}
func (r *ClusterReconciler) getFailureDomains(ctx context.Context, clusterCtx *vmware.ClusterContext) (clusterv1.FailureDomains, error) {
// Determine the source of failure domain based on feature gates NamespaceScopedZone.
// If NamespaceScopedZone is enabled, use Zone which is Namespace scoped,otherwise use
// Availability Zone which is Cluster scoped.
var failureDomainNames []string
if feature.Gates.Enabled(feature.NamespaceScopedZone) {
zoneList := &topologyv1.ZoneList{}
listOptions := &client.ListOptions{Namespace: clusterCtx.VSphereCluster.Namespace}
if err := r.Client.List(ctx, zoneList, listOptions); err != nil {
return nil, err
}

filteredZonelist := filterZonesWithoutDeletionTimestamp(zoneList)
if len(filteredZonelist.Items) == 0 {
return nil, nil
}

for _, zone := range filteredZonelist.Items {
failureDomainNames = append(failureDomainNames, zone.Name)
}
} else {
availabilityZoneList := &topologyv1.AvailabilityZoneList{}
if err := r.Client.List(ctx, availabilityZoneList); err != nil {
return nil, err
}

if len(availabilityZoneList.Items) == 0 {
return nil, nil
if len(availabilityZoneList.Items) == 0 {
return nil, nil
}
for _, az := range availabilityZoneList.Items {
failureDomainNames = append(failureDomainNames, az.Name)
}
}

failureDomains := clusterv1.FailureDomains{}
for _, az := range availabilityZoneList.Items {
failureDomains[az.Name] = clusterv1.FailureDomainSpec{
for _, name := range failureDomainNames {
failureDomains[name] = clusterv1.FailureDomainSpec{
ControlPlane: true,
}
}

return failureDomains, nil
}

// filterZonesWithoutDeletionTimestamp filters out zones with a deletionTimestamp.
func filterZonesWithoutDeletionTimestamp(zoneList *topologyv1.ZoneList) *topologyv1.ZoneList {
filtered := &topologyv1.ZoneList{}
for _, zone := range zoneList.Items {
if zone.DeletionTimestamp.IsZero() {
filtered.Items = append(filtered.Items, zone)
}
}
return filtered
}
155 changes: 132 additions & 23 deletions controllers/vmware/vspherecluster_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,24 @@ import (
. "github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/types"
. "github.com/onsi/gomega"
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apirecord "k8s.io/client-go/tools/record"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"
utilfeature "k8s.io/component-base/featuregate/testing"

vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/external/tanzu-topology/api/v1alpha1"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/network"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/vmoperator"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/util"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func TestVSphereClusterReconciler(t *testing.T) {
Expand Down Expand Up @@ -130,32 +134,137 @@ var _ = Describe("Cluster Controller Tests", func() {
})

Context("Test getFailureDomains", func() {
It("should not find FailureDomains", func() {
fds, err := reconciler.getFailureDomains(ctx)
It("should not find any FailureDomains if neither AvailabilityZone nor Zone exists", func() {
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).Should(BeEmpty())
})

It("should find FailureDomains", func() {
zoneNames := []string{"homer", "marge", "bart"}
for _, name := range zoneNames {
zone := &topologyv1.AvailabilityZone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "AvailabilityZone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Context("when only AvailabilityZone exists", func() {
BeforeEach(func() {
azNames := []string{"az-1", "az-2", "az-3"}
for _, name := range azNames {
az := &topologyv1.AvailabilityZone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "AvailabilityZone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
}

Expect(controllerManagerContext.Client.Create(ctx, az)).To(Succeed())
}
})

Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())
}
It("should discover FailureDomains using AvailabilityZone by default", func() {
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(3))
})

It("should return nil when NamespaceScopedZone is enabled", func() {
defer utilfeature.SetFeatureGateDuringTest(GinkgoTB(), feature.Gates, feature.NamespaceScopedZone, true)()
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).To(BeNil())
})
})

Context("when AvailabilityZone and Zone co-exists", func() {
BeforeEach(func() {
azNames := []string{"az-1", "az-2"}
for _, name := range azNames {
az := &topologyv1.AvailabilityZone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "AvailabilityZone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
}
Expect(controllerManagerContext.Client.Create(ctx, az)).To(Succeed())

}
zoneNames := []string{"zone-1", "zone-2", "zone-3"}
for _, name := range zoneNames {
zone := &topologyv1.Zone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "Zone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: clusterCtx.VSphereCluster.Namespace,
},
}

Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())
}
})

It("should discover FailureDomains using AvailabilityZone by default", func() {
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(2))
})

It("should discover FailureDomains using Zone when NamespaceScopedZone is enabled", func() {
defer utilfeature.SetFeatureGateDuringTest(GinkgoTB(), feature.Gates, feature.NamespaceScopedZone, true)()

fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(3))
})
})

Context("when Zone is marked for deleteion", func() {
BeforeEach(func() {
zoneNames := []string{"zone-1", "zone-2", "zone-3"}
zoneNamespace := clusterCtx.VSphereCluster.Namespace
for _, name := range zoneNames {
zone := &topologyv1.Zone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "Zone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: zoneNamespace,
Finalizers: []string{"zone.test.finalizer"},
},
}

Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())

if name == "zone-3" {
// Delete the zone to set the deletion timestamp
Expect(controllerManagerContext.Client.Delete(ctx, zone)).To(Succeed())
Zone3 := &topologyv1.Zone{}
Expect(controllerManagerContext.Client.Get(ctx, client.ObjectKey{Namespace: zoneNamespace, Name: name}, Zone3)).To(Succeed())

// Validate the deletion timestamp
Expect(Zone3.DeletionTimestamp.IsZero()).To(BeFalse())
}
}

})

It("should discover FailureDomains using Zone and filter out Zone marked for deletion", func() {
defer utilfeature.SetFeatureGateDuringTest(GinkgoTB(), feature.Gates, feature.NamespaceScopedZone, true)()

fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(2))
})

fds, err := reconciler.getFailureDomains(ctx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(3))
})

})
})
88 changes: 88 additions & 0 deletions external/tanzu-topology/api/v1alpha1/availability_zone.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

//nolint:revive
//nolint:stylecheck

Check failure on line 18 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

directive `//nolint:stylecheck` is unused for linter "stylecheck" (nolintlint)
package v1alpha1

Check failure on line 19 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

ST1000: at least one file in a package should have a package comment (stylecheck)

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// NamespaceInfo contains identifying information about the vSphere resources
// used to represent a Kubernetes namespace on individual vSphere Zones.
type NamespaceInfo struct {
// PoolMoId is the managed object ID of the vSphere ResourcePool for a
// Namespace on an individual vSphere Cluster.
PoolMoId string `json:"poolMoId,omitempty"`

Check failure on line 30 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

ST1003: struct field PoolMoId should be PoolMoID (stylecheck)

// PoolMoIDs are the managed object ID of the vSphere ResourcePools for a
// Namespace in an individual vSphere Zone. A zone may be comprised of
// multiple ResourcePools.
PoolMoIDs []string `json:"poolMoIDs,omitempty"`

// FolderMoId is the managed object ID of the vSphere Folder for a
// Namespace. Folders are global and not per-vSphere Cluster, but the
// FolderMoId is stored here, alongside the PoolMoId for convenience.
FolderMoId string `json:"folderMoId,omitempty"`

Check failure on line 40 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

ST1003: struct field FolderMoId should be FolderMoID (stylecheck)
}

// AvailabilityZoneSpec defines the desired state of AvailabilityZone.
type AvailabilityZoneSpec struct {
// ClusterComputeResourceMoId is the managed object ID of the vSphere
// ClusterComputeResource represented by this availability zone.
ClusterComputeResourceMoId string `json:"clusterComputeResourceMoId,omitempty"`

Check failure on line 47 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

ST1003: struct field ClusterComputeResourceMoId should be ClusterComputeResourceMoID (stylecheck)

// ClusterComputeResourceMoIDs are the managed object IDs of the vSphere
// ClusterComputeResources represented by this availability zone.
ClusterComputeResourceMoIDs []string `json:"clusterComputeResourceMoIDs,omitempty"`

// Namespaces is a map that enables querying information about the vSphere
// objects that make up a Kubernetes Namespace based on its name.
Namespaces map[string]NamespaceInfo `json:"namespaces,omitempty"`
}

// AvailabilityZoneStatus defines the observed state of AvailabilityZone.
type AvailabilityZoneStatus struct {
}

// AvailabilityZone is the schema for the AvailabilityZone resource for the
// vSphere topology API.
//
// +kubebuilder:object:root=true
// +kubebuilder:resource:path=availabilityzones,scope=Cluster,shortName=az
// +kubebuilder:storageversion
// +kubebuilder:subresource:status
type AvailabilityZone struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec AvailabilityZoneSpec `json:"spec,omitempty"`
Status AvailabilityZoneStatus `json:"status,omitempty"`
}

// AvailabilityZoneList contains a list of AvailabilityZone resources.
//
// +kubebuilder:object:root=true
type AvailabilityZoneList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []AvailabilityZone `json:"items"`
}

func init() {
SchemeBuilder.Register(&AvailabilityZone{}, &AvailabilityZoneList{})
}
Loading

0 comments on commit e19a5cd

Please sign in to comment.