Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: imagePullJob support Tolerations #1705

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions apis/apps/v1alpha1/imagepulljob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1

import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)
Expand Down Expand Up @@ -63,6 +64,10 @@ type ImagePullJobTemplate struct {
// +optional
PodSelector *ImagePullJobPodSelector `json:"podSelector,omitempty"`

// Tolerations allow image pull to be scheduled onto nodes with specific taints
// +optional
Tolerations []v1.Toleration `json:"tolerations,omitempty"`

// Parallelism is the requested parallelism, it can be set to any non-negative value. If it is unspecified,
// it defaults to 1. If it is specified as 0, then the Job is effectively paused until it is increased.
// +optional
Expand Down
7 changes: 7 additions & 0 deletions apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions config/crd/bases/apps.kruise.io_imagelistpulljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,46 @@ spec:
type: array
type: object
x-kubernetes-map-type: atomic
tolerations:
description: Tolerations allow image pull to be scheduled onto nodes
with specific taints
items:
description: |-
The pod this Toleration is attached to tolerates any taint that matches
the triple <key,value,effect> using the matching operator <operator>.
properties:
effect:
description: |-
Effect indicates the taint effect to match. Empty means match all taint effects.
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: |-
Key is the taint key that the toleration applies to. Empty means match all taint keys.
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
type: string
operator:
description: |-
Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal.
Exists is equivalent to wildcard for value, so that a pod can
tolerate all taints of a particular category.
type: string
tolerationSeconds:
description: |-
TolerationSeconds represents the period of time the toleration (which must be
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
it is not set, which means tolerate the taint forever (do not evict). Zero and
negative values will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: |-
Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise just a regular string.
type: string
type: object
type: array
required:
- completionPolicy
- images
Expand Down
40 changes: 40 additions & 0 deletions config/crd/bases/apps.kruise.io_imagepulljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,46 @@ spec:
type: array
type: object
x-kubernetes-map-type: atomic
tolerations:
description: Tolerations allow image pull to be scheduled onto nodes
with specific taints
items:
description: |-
The pod this Toleration is attached to tolerates any taint that matches
the triple <key,value,effect> using the matching operator <operator>.
properties:
effect:
description: |-
Effect indicates the taint effect to match. Empty means match all taint effects.
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: |-
Key is the taint key that the toleration applies to. Empty means match all taint keys.
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
type: string
operator:
description: |-
Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal.
Exists is equivalent to wildcard for value, so that a pod can
tolerate all taints of a particular category.
type: string
tolerationSeconds:
description: |-
TolerationSeconds represents the period of time the toleration (which must be
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
it is not set, which means tolerate the taint forever (do not evict). Zero and
negative values will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: |-
Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise just a regular string.
type: string
type: object
type: array
required:
- completionPolicy
- image
Expand Down
7 changes: 7 additions & 0 deletions pkg/controller/imagepulljob/imagepulljob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,13 @@
if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get NodeImages: %v", err)
}
// Filter nodeImage by Tolerations when featureGate is true.
if utilfeature.DefaultFeatureGate.Enabled(features.ImagePullJobTolerationGate) {
nodeImages, err = utilimagejob.TolerationNodeImages(r.Client, nodeImages, job)
if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get NodeImages for Toleration: %v", err)

Check warning on line 239 in pkg/controller/imagepulljob/imagepulljob_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/imagepulljob/imagepulljob_controller.go#L236-L239

Added lines #L236 - L239 were not covered by tests
}
}

// If resourceVersion expectations have not satisfied yet, just skip this reconcile
for _, nodeImage := range nodeImages {
Expand Down
4 changes: 4 additions & 0 deletions pkg/features/kruise_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ const (
// ImagePullJobGate enable imagepulljob-controller execute ImagePullJob.
ImagePullJobGate featuregate.Feature = "ImagePullJobGate"

// ImagePullJobTolerationGate enable ImagePullJob support Tolerations.
ImagePullJobTolerationGate featuregate.Feature = "ImagePullJobTolerationGate"

// ResourceDistributionGate enable resourcedistribution-controller execute ResourceDistribution.
ResourceDistributionGate featuregate.Feature = "ResourceDistributionGate"

Expand Down Expand Up @@ -157,6 +160,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
CloneSetEventHandlerOptimization: {Default: false, PreRelease: featuregate.Alpha},
PreparingUpdateAsUpdate: {Default: false, PreRelease: featuregate.Alpha},
ImagePullJobGate: {Default: false, PreRelease: featuregate.Alpha},
ImagePullJobTolerationGate: {Default: false, PreRelease: featuregate.Alpha},
ResourceDistributionGate: {Default: false, PreRelease: featuregate.Alpha},
DeletionProtectionForCRDCascadingGate: {Default: false, PreRelease: featuregate.Alpha},

Expand Down
41 changes: 41 additions & 0 deletions pkg/util/imagejob/imagejob_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,47 @@
return convertNodeImages(nodeImageList), err
}

func TolerationNodeImages(reader client.Reader, nodeImages []*appsv1alpha1.NodeImage, job *appsv1alpha1.ImagePullJob) (tolerationNodeImage []*appsv1alpha1.NodeImage, err error) {
for _, ng := range nodeImages {
var node v1.Node
if err = reader.Get(context.TODO(), types.NamespacedName{Name: ng.Name}, &node); err != nil {
if errors.IsNotFound(err) {
tolerationNodeImage = append(tolerationNodeImage, ng)
continue

Check warning on line 162 in pkg/util/imagejob/imagejob_reader.go

View check run for this annotation

Codecov / codecov/patch

pkg/util/imagejob/imagejob_reader.go#L160-L162

Added lines #L160 - L162 were not covered by tests
}
return nil, fmt.Errorf("get specific Node %s error: %v", ng.Name, err)

Check warning on line 164 in pkg/util/imagejob/imagejob_reader.go

View check run for this annotation

Codecov / codecov/patch

pkg/util/imagejob/imagejob_reader.go#L164

Added line #L164 was not covered by tests
}
if nodeMatchesTolerations(node, job.Spec.Tolerations) {
tolerationNodeImage = append(tolerationNodeImage, ng)
}
}
return
}

// nodeMatchesTolerations pod must have Toleration that matches all node Taint to return true
func nodeMatchesTolerations(node v1.Node, tolerations []v1.Toleration) bool {
for _, taint := range node.Spec.Taints {
if !tolerationToleratesTaint(tolerations, taint) {
return false
}
}
return true
}

func tolerationToleratesTaint(tolerations []v1.Toleration, taint v1.Taint) bool {
for _, toleration := range tolerations {
if toleration.Key == taint.Key && toleration.Effect == taint.Effect {
if toleration.Operator == v1.TolerationOpExists {
return true
}
if toleration.Operator == v1.TolerationOpEqual && toleration.Value == taint.Value {
return true
}
}
}
return false
}

func convertNodeImages(nodeImageList *appsv1alpha1.NodeImageList) []*appsv1alpha1.NodeImage {
nodeImages := make([]*appsv1alpha1.NodeImage, 0, len(nodeImageList.Items))
for i := range nodeImageList.Items {
Expand Down
Loading
Loading