Skip to content

Commit

Permalink
1. adapter.go: GetReplicaDetails returns pods in the subset
Browse files Browse the repository at this point in the history
2. xxx_adapter.go: return pods implementation ⬆️
3. allocator.go: about safeReplica
4. pod_condition_utils.go: extract PodUnscheduledTimeout function from workloadwpread
5. reschedule.go: PodUnscheduledTimeout function extracted
6. subset.go: add some field to Subset object to carry related information
7. subset_control.go: store subset pods to Subset object
8. uniteddeployment_controller.go
   1. add requeue feature to check failed pods
   2. subset unschedulable status management
9. uniteddeployment_types.go: API change
10. uniteddeployment_update.go: sync unschedulable to CR

Signed-off-by: AiRanthem <[email protected]>
  • Loading branch information
AiRanthem committed Sep 9, 2024
1 parent 4918768 commit cba277a
Show file tree
Hide file tree
Showing 23 changed files with 1,017 additions and 76 deletions.
100 changes: 99 additions & 1 deletion apis/apps/v1alpha1/uniteddeployment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ limitations under the License.
package v1alpha1

import (
"strconv"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -165,6 +168,10 @@ type Topology struct {
// +patchStrategy=merge
// +optional
Subsets []Subset `json:"subsets,omitempty" patchStrategy:"merge" patchMergeKey:"name"`

// ScheduleStrategy indicates the strategy the UnitedDeployment used to preform the schedule between each of subsets.
// +optional
ScheduleStrategy UnitedDeploymentScheduleStrategy `json:"scheduleStrategy,omitempty"`
}

// Subset defines the detail of a subset.
Expand Down Expand Up @@ -218,6 +225,69 @@ type Subset struct {
Patch runtime.RawExtension `json:"patch,omitempty"`
}

// UnitedDeploymentScheduleStrategyType is a string enumeration type that enumerates
// all possible schedule strategies for the UnitedDeployment controller.
// +kubebuilder:validation:Enum=Adaptive;Fixed;""
type UnitedDeploymentScheduleStrategyType string

const (
// AdaptiveUnitedDeploymentScheduleStrategyType represents that when a pod is stuck in the pending status and cannot
// be scheduled, allow it to be rescheduled to another subset.
AdaptiveUnitedDeploymentScheduleStrategyType UnitedDeploymentScheduleStrategyType = "Adaptive"
// FixedUnitedDeploymentScheduleStrategyType represents that pods are strictly scheduled to the selected subset
// even if scheduling fail.
FixedUnitedDeploymentScheduleStrategyType UnitedDeploymentScheduleStrategyType = "Fixed"
)

const (
DefaultRescheduleCriticalDuration = 30 * time.Second
DefaultUnschedulableStatusLastDuration = 300 * time.Second
)

// AdaptiveUnitedDeploymentStrategy is used to communicate parameters when Type is AdaptiveUnitedDeploymentScheduleStrategyType.
type AdaptiveUnitedDeploymentStrategy struct {
// RescheduleCriticalSeconds indicates how long controller will reschedule a schedule failed Pod to the subset that has
// redundant capacity after the subset where the Pod lives. If a Pod was scheduled failed and still in an unschedulabe status
// over RescheduleCriticalSeconds duration, the controller will reschedule it to a suitable subset. Default is 30 seconds.
// +optional
RescheduleCriticalSeconds *int32 `json:"rescheduleCriticalSeconds,omitempty"`

// UnschedulableLastSeconds is used to set the number of seconds for a Subset to recover from an unschedulable state,
// with a default value of 300 seconds.
// +optional
UnschedulableLastSeconds *int32 `json:"unschedulableLastSeconds,omitempty"`
}

// UnitedDeploymentScheduleStrategy defines the schedule performance of UnitedDeployment.
type UnitedDeploymentScheduleStrategy struct {
// Type indicates the type of the UnitedDeploymentScheduleStrategy.
// Default is Fixed
// +optional
Type UnitedDeploymentScheduleStrategyType `json:"type,omitempty"`

// Adaptive is used to communicate parameters when Type is AdaptiveUnitedDeploymentScheduleStrategyType.
// +optional
Adaptive *AdaptiveUnitedDeploymentStrategy `json:"adaptive,omitempty"`
}

func (s *UnitedDeploymentScheduleStrategy) IsAdaptive() bool {
return s.Type == AdaptiveUnitedDeploymentScheduleStrategyType
}

func (s *UnitedDeploymentScheduleStrategy) GetRescheduleCriticalDuration() time.Duration {
if s.Adaptive == nil || s.Adaptive.RescheduleCriticalSeconds == nil {
return DefaultRescheduleCriticalDuration
}
return time.Duration(*s.Adaptive.RescheduleCriticalSeconds) * time.Second
}

func (s *UnitedDeploymentScheduleStrategy) GetUnschedulableLastDuration() time.Duration {
if s.Adaptive == nil || s.Adaptive.UnschedulableLastSeconds == nil {
return DefaultUnschedulableStatusLastDuration
}
return time.Duration(*s.Adaptive.UnschedulableLastSeconds) * time.Second
}

// UnitedDeploymentStatus defines the observed state of UnitedDeployment.
type UnitedDeploymentStatus struct {
// ObservedGeneration is the most recent generation observed for this UnitedDeployment. It corresponds to the
Expand Down Expand Up @@ -252,6 +322,10 @@ type UnitedDeploymentStatus struct {
// +optional
SubsetReplicas map[string]int32 `json:"subsetReplicas,omitempty"`

// Record whether each subset is unschedulable.
// +optional
SubsetUnschedulable *SubsetUnschedulable `json:"subsetUnschedulable,omitempty"`

// Represents the latest available observations of a UnitedDeployment's current state.
// +optional
Conditions []UnitedDeploymentCondition `json:"conditions,omitempty"`
Expand All @@ -278,7 +352,7 @@ type UnitedDeploymentCondition struct {
// The reason for the condition's last transition.
Reason string `json:"reason,omitempty"`

// A human readable message indicating details about the transition.
// A human-readable message indicating details about the transition.
Message string `json:"message,omitempty"`
}

Expand All @@ -293,6 +367,30 @@ type UpdateStatus struct {
CurrentPartitions map[string]int32 `json:"currentPartitions,omitempty"`
}

type SubsetUnschedulable struct {
Version int `json:"version"`
Status map[string]UnschedulableStatus `json:"status"`
}

func (s *SubsetUnschedulable) GetVersion() string {
return strconv.Itoa(s.Version)
}

func (s *SubsetUnschedulable) NotOlderThan(version string) bool {
if v, err := strconv.Atoi(version); err == nil {
return s.Version >= v
}
return false
}

type UnschedulableStatus struct {
Unschedulable bool `json:"unschedulable"`
// +optional
UnschedulableTimestamp *metav1.Time `json:"unschedulableTimestamp,omitempty"`
// +optional
PendingPods int32 `json:"-"`
}

// +genclient
// +genclient:method=GetScale,verb=get,subresource=scale,result=k8s.io/api/autoscaling/v1.Scale
// +genclient:method=UpdateScale,verb=update,subresource=scale,input=k8s.io/api/autoscaling/v1.Scale,result=k8s.io/api/autoscaling/v1.Scale
Expand Down
92 changes: 92 additions & 0 deletions apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 54 additions & 1 deletion config/crd/bases/apps.kruise.io_uniteddeployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,38 @@ spec:
description: Topology describes the pods distribution detail between
each of subsets.
properties:
scheduleStrategy:
description: ScheduleStrategy indicates the strategy the UnitedDeployment
used to preform the schedule between each of subsets.
properties:
adaptive:
description: Adaptive is used to communicate parameters when
Type is AdaptiveUnitedDeploymentScheduleStrategyType.
properties:
rescheduleCriticalSeconds:
description: |-
RescheduleCriticalSeconds indicates how long controller will reschedule a schedule failed Pod to the subset that has
redundant capacity after the subset where the Pod lives. If a Pod was scheduled failed and still in an unschedulabe status
over RescheduleCriticalSeconds duration, the controller will reschedule it to a suitable subset. Default is 30 seconds.
format: int32
type: integer
unschedulableLastSeconds:
description: |-
UnschedulableLastSeconds is used to set the number of seconds for a Subset to recover from an unschedulable state,
with a default value of 300 seconds.
format: int32
type: integer
type: object
type:
description: |-
Type indicates the type of the UnitedDeploymentScheduleStrategy.
Default is Fixed
enum:
- Adaptive
- Fixed
- ""
type: string
type: object
subsets:
description: |-
Contains the details of each subset. Each element in this array represents one subset
Expand Down Expand Up @@ -1173,7 +1205,7 @@ spec:
format: date-time
type: string
message:
description: A human readable message indicating details about
description: A human-readable message indicating details about
the transition.
type: string
reason:
Expand Down Expand Up @@ -1216,6 +1248,27 @@ spec:
description: Records the topology detail information of the replicas
of each subset.
type: object
subsetUnschedulable:
description: Record whether each subset is unschedulable.
properties:
status:
additionalProperties:
properties:
unschedulable:
type: boolean
unschedulableTimestamp:
format: date-time
type: string
required:
- unschedulable
type: object
type: object
version:
type: integer
required:
- status
- version
type: object
updateStatus:
description: Records the information of update progress.
properties:
Expand Down
8 changes: 5 additions & 3 deletions pkg/controller/uniteddeployment/adapter/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package adapter

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -25,14 +26,15 @@ import (
)

type Adapter interface {
// NewResourceObject creates a empty subset object.
// NewResourceObject creates an empty subset object.
NewResourceObject() client.Object
// NewResourceListObject creates a empty subset list object.
// NewResourceListObject creates an empty subset list object.
NewResourceListObject() client.ObjectList
// GetStatusObservedGeneration returns the observed generation of the subset.
GetStatusObservedGeneration(subset metav1.Object) int64
// GetReplicaDetails returns the replicas information of the subset status.
GetReplicaDetails(subset metav1.Object, updatedRevision string) (specReplicas, specPartition *int32, statusReplicas, statusReadyReplicas, statusUpdatedReplicas, statusUpdatedReadyReplicas int32, err error)
GetReplicaDetails(subset metav1.Object, updatedRevision string) (specReplicas, specPartition *int32, statusReplicas,
statusReadyReplicas, statusUpdatedReplicas, statusUpdatedReadyReplicas int32, pods []*corev1.Pod, err error)
// GetSubsetFailure returns failure information of the subset.
GetSubsetFailure() *string
// ApplySubsetTemplate updates the subset to the latest revision.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@ func (a *AdvancedStatefulSetAdapter) GetStatusObservedGeneration(obj metav1.Obje
}

// GetReplicaDetails returns the replicas detail the subset needs.
func (a *AdvancedStatefulSetAdapter) GetReplicaDetails(obj metav1.Object, updatedRevision string) (specReplicas, specPartition *int32, statusReplicas, statusReadyReplicas, statusUpdatedReplicas, statusUpdatedReadyReplicas int32, err error) {
func (a *AdvancedStatefulSetAdapter) GetReplicaDetails(obj metav1.Object, updatedRevision string) (specReplicas, specPartition *int32, statusReplicas, statusReadyReplicas, statusUpdatedReplicas, statusUpdatedReadyReplicas int32, pods []*corev1.Pod, err error) {
set := obj.(*v1beta1.StatefulSet)
var pods []*corev1.Pod
pods, err = a.getStatefulSetPods(set)
if err != nil {
return
Expand Down
Loading

0 comments on commit cba277a

Please sign in to comment.