Skip to content

Commit

Permalink
Nodereaper skip label (#32)
Browse files Browse the repository at this point in the history
* added node label checks that can be used to skip node reaping

* simplify SkipLabel type usage to use existing string label constants, moved node label creation to ReaperUnitTest field to remove duplicate code

* moved node labels to FakeNode struct, createNodeLabels will combine the node type label (master/slave) to the list of nodes specified in FakeNode.nodeLabels

* updated skipLabelTest's to have skip labels with false to ensure those still get reaped
  • Loading branch information
pyieh committed May 9, 2020
1 parent b7f60a8 commit 4a20228
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 16 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ cover.html
_output/*
bin
bin/*
.DS_Store
.DS_Store
.idea/*
27 changes: 18 additions & 9 deletions pkg/reaper/nodereaper/nodereaper.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,15 @@ import (
var log = logrus.New()

const (
ageUnreapableAnnotationKey = "governor.keikoproj.io/age-unreapable"
stateAnnotationKey = "governor.keikoproj.io/state"
terminatedStateName = "termination-issued"
drainingStateName = "draining"
ageUnreapableAnnotationKey = "governor.keikoproj.io/age-unreapable"
stateAnnotationKey = "governor.keikoproj.io/state"
terminatedStateName = "termination-issued"
drainingStateName = "draining"
reaperDisableLabelKey = "governor.keikoproj.io/node-reaper-disabled"
reapUnreadyDisabledLabelKey = "governor.keikoproj.io/reap-unready-disabled"
reapUnknownDisabledLabelKey = "governor.keikoproj.io/reap-unknown-disabled"
reapFlappyDisabledLabelKey = "governor.keikoproj.io/reap-flappy-disabled"
reapOldDisabledLabelKey = "governor.keikoproj.io/reap-old-disabled"
)

// Validate command line arguments
Expand Down Expand Up @@ -293,7 +298,7 @@ func (ctx *ReaperContext) deriveAgeDrainReapableNodes() error {

// Drain-Reap old nodes
if ctx.ReapOld {
if !nodeHasAnnotation(node, ageUnreapableAnnotationKey, "true") {
if !nodeHasAnnotation(node, ageUnreapableAnnotationKey, "true") && !hasSkipLabel(node, reapOldDisabledLabelKey){
if nodeIsAgeReapable(nodeAgeMinutes, ageThreshold) {
log.Infof("node %v is drain-reapable !! State = OldAge, Diff = %v/%v", nodeName, nodeAgeMinutes, ageThreshold)
ctx.addAgeDrainReapable(nodeName, nodeInstanceID, nodeAgeMinutes)
Expand Down Expand Up @@ -321,7 +326,7 @@ func (ctx *ReaperContext) deriveFlappyDrainReapableNodes() error {

// Drain-Reap flappy nodes
if ctx.ReapFlappy {
if nodeIsFlappy(events, nodeName, countThreshold, "NodeReady") {
if nodeIsFlappy(events, nodeName, countThreshold, "NodeReady") && !hasSkipLabel(node, reapFlappyDisabledLabelKey) {
log.Infof("node %v is drain-reapable !! State = ReadinessFlapping", nodeName)
ctx.addDrainable(nodeName, nodeInstanceID)
ctx.addReapable(nodeName, nodeInstanceID)
Expand Down Expand Up @@ -392,7 +397,7 @@ func (ctx *ReaperContext) deriveReapableNodes() error {
continue
}

if ctx.ReapUnready && nodeStateIsNotReady(&node) {
if ctx.ReapUnready && nodeStateIsNotReady(&node) && !hasSkipLabel(node, reapUnreadyDisabledLabelKey) {
if nodeMeetsReapAfterThreshold(ctx.TimeToReap, lastStateDurationIntervalMinutes) {
log.Infof("node %v is reapable !! State = NotReady, diff: %.2f/%v", nodeName, lastStateDurationIntervalMinutes, ctx.TimeToReap)
ctx.addReapable(nodeName, nodeInstanceID)
Expand All @@ -402,7 +407,7 @@ func (ctx *ReaperContext) deriveReapableNodes() error {
}
}

if ctx.ReapUnknown && nodeStateIsUnknown(&node) {
if ctx.ReapUnknown && nodeStateIsUnknown(&node) && !hasSkipLabel(node, reapUnknownDisabledLabelKey) {
if nodeMeetsReapAfterThreshold(ctx.TimeToReap, lastStateDurationIntervalMinutes) {
log.Infof("node %v is reapable !! State = Unknown, diff: %.2f/%v", nodeName, lastStateDurationIntervalMinutes, ctx.TimeToReap)
ctx.addReapable(nodeName, nodeInstanceID)
Expand Down Expand Up @@ -620,7 +625,7 @@ func (ctx *ReaperContext) scan(w ReaperAwsAuth) error {
log.Infof("found %v nodes, %v pods, and %v events", len(ctx.AllNodes), len(ctx.AllPods), len(ctx.AllEvents))
for _, node := range nodeList.Items {
ctx.NodeInstanceIDs[getNodeInstanceID(&node)] = node.Name
if nodeStateIsNotReady(&node) || nodeStateIsUnknown(&node) {
if (nodeStateIsNotReady(&node) || nodeStateIsUnknown(&node)) {
log.Infof("node %v is not ready", node.ObjectMeta.Name)
ctx.UnreadyNodes = append(ctx.UnreadyNodes, node)
}
Expand Down Expand Up @@ -736,3 +741,7 @@ func nodeIsFlappy(events []v1.Event, name string, threshold int32, reason string
}
return false
}

func hasSkipLabel(node v1.Node, label string) bool {
return node.ObjectMeta.Labels[reaperDisableLabelKey] == "true" || node.ObjectMeta.Labels[label] == "true"
}
247 changes: 241 additions & 6 deletions pkg/reaper/nodereaper/nodereaper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,25 @@ func loadFakeAPI(ctx *ReaperContext) {
}
}

func createNodeLabels(node FakeNode, ctx *ReaperContext) map[string]string {
nodeLabels := make(map[string]string)
if node.isMaster {
nodeLabels["kubernetes.io/role"] = "master"
} else {
nodeLabels["kubernetes.io/role"] = "node"
}

for key, value := range node.nodeLabels {
nodeLabels[key] = value
}

return nodeLabels
}


func createFakeNodes(nodes []FakeNode, ctx *ReaperContext) {
for _, n := range nodes {
nodeLabels := make(map[string]string)
if n.isMaster {
nodeLabels["kubernetes.io/role"] = "master"
} else {
nodeLabels["kubernetes.io/role"] = "node"
}
nodeLabels := createNodeLabels(n, ctx)

creationTimestamp := metav1.Time{Time: time.Now()}
if n.ageMinutes != 0 {
Expand Down Expand Up @@ -463,6 +474,7 @@ func (u *ReaperUnitTest) Run(t *testing.T, timeTest bool) {
}
}


type ReaperUnitTest struct {
TestDescription string
Nodes []FakeNode
Expand Down Expand Up @@ -497,6 +509,7 @@ type FakeNode struct {
unreadyPods int
lostPods int
ageMinutes int
nodeLabels map[string]string
}
type FakePod struct {
podName string
Expand Down Expand Up @@ -1391,3 +1404,225 @@ func TestProviderIDParser(t *testing.T) {
t.Fatalf("expected Region: %v, got: %v", expectedRegion, providerRegion)
}
}

func TestSkipLabelReaper(t *testing.T) {
reaper := newFakeReaperContext()
reaper.ReapUnknown = true
reaper.ReapUnready = true
reaper.AsgValidation = true
reaper.FlapCount = 4

testCase := ReaperUnitTest{
TestDescription: "DisableReaper label detection - skip reaping of any node if it has this label with value 'true'",
InstanceGroup: FakeASG{
Name: "my-ig.cluster.k8s.local",
Healthy: 2,
Unhealthy: 0,
Desired: 2,
},
Nodes: []FakeNode{
{
nodeName: "node-old",
state: "Ready",
ageMinutes: 43100,
nodeLabels: map[string]string{reaperDisableLabelKey: "true"},
},
{
nodeName: "node-flappy",
state: "Ready",
nodeLabels: map[string]string{reaperDisableLabelKey: "true"},
},
{
nodeName: "node-unknown",
state: "Unknown",
lastTransitionMinutes: 6,
nodeLabels: map[string]string{reaperDisableLabelKey: "true"},
},
{
nodeName: "node-unready",
state: "NotReady",
lastTransitionMinutes: 6,
nodeLabels: map[string]string{reaperDisableLabelKey: "true"},
},
},
Events: []FakeEvent{
{
node: "node-flappy",
count: 4,
reason: "NodeReady",
kind: "Node",
},
},
FakeReaper: reaper,
ExpectedUnready: 2,
ExpectedReapable: 0,
ExpectedDrainable: 0,
ExpectedOldReapable: 0,
ExpectedTerminated: 0,
ExpectedDrained: 0,
}
testCase.Run(t, false)
}

func TestSkipLabelUnknownNodes(t *testing.T) {
reaper := newFakeReaperContext()
reaper.ReapUnknown = true
reaper.AsgValidation = true

testCase := ReaperUnitTest{
TestDescription: "DisableUnknownReaper label Detection - unknown nodes with skip label as 'true' are skipped from being reaped",
InstanceGroup: FakeASG{
Name: "my-ig.cluster.k8s.local",
Healthy: 1,
Unhealthy: 0,
Desired: 1,
},
Nodes: []FakeNode{
{
nodeName: "node-unknown-1",
state: "Unknown",
lastTransitionMinutes: 6,
nodeLabels: map[string]string{reapUnknownDisabledLabelKey: "true"},
},
{
nodeName: "node-unknown-2",
state: "Unknown",
lastTransitionMinutes: 6,
nodeLabels: map[string]string{reapUnknownDisabledLabelKey: "false"},
},
},
FakeReaper: reaper,
ExpectedUnready: 2,
ExpectedReapable: 1,
ExpectedDrainable: 0,
ExpectedTerminated: 1,
ExpectedDrained: 0,
}
testCase.Run(t, false)
}

func TestSkipLabelUnreadyNodes(t *testing.T) {
reaper := newFakeReaperContext()
reaper.ReapUnready = true
reaper.AsgValidation = true

testCase := ReaperUnitTest{
TestDescription: "DisableUnreadyReaper label Detection - unready nodes with skip label as 'true' are skipped from being reaped",
InstanceGroup: FakeASG{
Name: "my-ig.cluster.k8s.local",
Healthy: 1,
Unhealthy: 0,
Desired: 1,
},
Nodes: []FakeNode{
{
nodeName: "node-unready-1",
state: "NotReady",
lastTransitionMinutes: 6,
nodeLabels: map[string]string{reapUnreadyDisabledLabelKey: "true"},
},
{
nodeName: "node-unready-2",
state: "NotReady",
lastTransitionMinutes: 6,
nodeLabels: map[string]string{reapUnreadyDisabledLabelKey: "false"},
},
},
FakeReaper: reaper,
ExpectedUnready: 2,
ExpectedReapable: 1,
ExpectedDrainable: 0,
ExpectedTerminated: 1,
ExpectedDrained: 0,
}
testCase.Run(t, false)
}

func TestSkipLabelOldNodes(t *testing.T) {
reaper := newFakeReaperContext()

testCase := ReaperUnitTest{
TestDescription: "DisableOldReaper label Detection - old nodes with skip label as 'true' are skipped from being reaped",
InstanceGroup: FakeASG{
Name: "my-ig.cluster.k8s.local",
Healthy: 2,
Unhealthy: 0,
Desired: 2,
},
Nodes: []FakeNode{
{
nodeName: "node-old-1",
state: "Ready",
ageMinutes: 43100,
nodeLabels: map[string]string{reapOldDisabledLabelKey: "true"},
},
{
nodeName: "node-old-2",
state: "Ready",
ageMinutes: 43100,
nodeLabels: map[string]string{reapOldDisabledLabelKey: "false"},
},
},
FakeReaper: reaper,
ExpectedUnready: 0,
ExpectedOldReapable: 1,
ExpectedTerminated: 1,
ExpectedDrained: 1,
}
testCase.Run(t, false)
}

func TestSkipLabelFlappyNodes(t *testing.T) {
reaper := newFakeReaperContext()
reaper.FlapCount = 4

testCase := ReaperUnitTest{
TestDescription: "DisableFlappyReaper label Detection - flappy nodes with skip label as 'true' are skipped from being reaped",
InstanceGroup: FakeASG{
Name: "my-ig.cluster.k8s.local",
Healthy: 2,
Unhealthy: 0,
Desired: 2,
},
Nodes: []FakeNode{
{
nodeName: "ip-10-10-10-10.us-west-2.compute.local",
state: "Ready",
nodeLabels: map[string]string{reapFlappyDisabledLabelKey: "true"},
},
{
nodeName: "ip-10-10-10-11.us-west-2.compute.local",
state: "Ready",
nodeLabels: map[string]string{reapFlappyDisabledLabelKey: "false"},
},
},
Events: []FakeEvent{
{
node: "ip-10-10-10-10.us-west-2.compute.local",
count: 3,
reason: "NodeReady",
kind: "Node",
},
{
node: "ip-10-10-10-10.us-west-2.compute.local",
count: 1,
reason: "NodeReady",
kind: "Node",
},
{
node: "ip-10-10-10-11.us-west-2.compute.local",
count: 4,
reason: "NodeReady",
kind: "Node",
},
},
FakeReaper: reaper,
ExpectedUnready: 0,
ExpectedReapable: 1,
ExpectedDrainable: 1,
ExpectedTerminated: 1,
ExpectedDrained: 1,
}
testCase.Run(t, false)

}

0 comments on commit 4a20228

Please sign in to comment.