From ed556b3029c8f5fe73b3208e29da0216060df424 Mon Sep 17 00:00:00 2001 From: razo7 Date: Tue, 25 Jul 2023 17:52:16 +0300 Subject: [PATCH 1/4] Support only reboot action Raise a warning when a different action was asked, and either way try to reboot the node --- .../fenceagentsremediation_controller.go | 15 ++++++++- .../fenceagentsremediation_controller_test.go | 33 ++++++++++++++++--- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/controllers/fenceagentsremediation_controller.go b/controllers/fenceagentsremediation_controller.go index a021b7ad..20dda578 100644 --- a/controllers/fenceagentsremediation_controller.go +++ b/controllers/fenceagentsremediation_controller.go @@ -38,6 +38,8 @@ const ( errorMissingParams = "nodeParameters or sharedParameters or both are missing, and they cannot be empty" errorMissingNodeParams = "node parameter is required, and cannot be empty" SuccessFAResponse = "Success: Rebooted" + parameterActionName = "--action" + parameterActionValue = "reboot" ) // FenceAgentsRemediationReconciler reconciles a FenceAgentsRemediation object @@ -172,10 +174,21 @@ func buildFenceAgentParams(far *v1alpha1.FenceAgentsRemediation) ([]string, erro return nil, errors.New(errorMissingParams) } var fenceAgentParams []string + logger := ctrl.Log.WithName("build-fa-parameters") + // add shared parameters except the action parameter for paramName, paramVal := range far.Spec.SharedParameters { - fenceAgentParams = appendParamToSlice(fenceAgentParams, paramName, paramVal) + if paramName == parameterActionName { + if paramVal != parameterActionValue { + logger.Info("FAR doesn't support any other action than reboot", "action", paramVal) + } + } else { + fenceAgentParams = appendParamToSlice(fenceAgentParams, paramName, paramVal) + } } + // ensure the FA uses the reboot action + fenceAgentParams = appendParamToSlice(fenceAgentParams, parameterActionName, parameterActionValue) + // append node parameters nodeName := v1alpha1.NodeName(far.Name) for paramName, nodeMap := range far.Spec.NodeParameters { if nodeVal, isFound := nodeMap[nodeName]; isFound { diff --git a/controllers/fenceagentsremediation_controller_test.go b/controllers/fenceagentsremediation_controller_test.go index 951af145..9038c1c0 100644 --- a/controllers/fenceagentsremediation_controller_test.go +++ b/controllers/fenceagentsremediation_controller_test.go @@ -52,6 +52,13 @@ var _ = Describe("FAR Controller", func() { node *corev1.Node ) + invalidShareParam := map[v1alpha1.ParameterName]string{ + "--username": "admin", + "--password": "password", + "--action": "", + "--ip": "192.168.111.1", + "--lanplus": "", + } testShareParam := map[v1alpha1.ParameterName]string{ "--username": "admin", "--password": "password", @@ -75,6 +82,17 @@ var _ = Describe("FAR Controller", func() { Context("Functionality", func() { Context("buildFenceAgentParams", func() { + When("FAR include different action than reboot", func() { + It("should succeed with a warning", func() { + invalidValTestFAR := getFenceAgentsRemediation(node01, fenceAgentIPMI, invalidShareParam, testNodeParam) + invalidShareString, err := buildFenceAgentParams(invalidValTestFAR) + Expect(err).NotTo(HaveOccurred()) + validShareString, err := buildFenceAgentParams(underTestFAR) + Expect(err).NotTo(HaveOccurred()) + // Eventually buildFenceAgentParams would return the same shareParam + Expect(isEqualStringLists(invalidShareString, validShareString)).To(BeTrue()) + }) + }) When("FAR CR's name doesn't match a node name", func() { It("should fail", func() { underTestFAR.ObjectMeta.Name = dummyNode @@ -153,8 +171,8 @@ var _ = Describe("FAR Controller", func() { }) }) -// newFenceAgentsRemediation assigns the input to the FenceAgentsRemediation -func getFenceAgentsRemediation(nodeName string, agent string, sharedparameters map[v1alpha1.ParameterName]string, nodeparameters map[v1alpha1.ParameterName]map[v1alpha1.NodeName]string) *v1alpha1.FenceAgentsRemediation { +// getFenceAgentsRemediation assigns the input to the FenceAgentsRemediation +func getFenceAgentsRemediation(nodeName, agent string, sharedparameters map[v1alpha1.ParameterName]string, nodeparameters map[v1alpha1.ParameterName]map[v1alpha1.NodeName]string) *v1alpha1.FenceAgentsRemediation { return &v1alpha1.FenceAgentsRemediation{ ObjectMeta: metav1.ObjectMeta{Name: nodeName, Namespace: defaultNamespace}, Spec: v1alpha1.FenceAgentsRemediationSpec{ @@ -179,6 +197,13 @@ func buildFarPod() *corev1.Pod { return fenceAgentsPod } +// isEqualStringLists return true if two string lists share the same values +func isEqualStringLists(s1, s2 []string) bool { + sort.Strings(s1) + sort.Strings(s2) + return reflect.DeepEqual(s1, s2) +} + // cliCommandsEquality creates the command for CLI and compares it with the production command func cliCommandsEquality(far *v1alpha1.FenceAgentsRemediation) (bool, error) { if mocksExecuter.command == nil { @@ -190,9 +215,7 @@ func cliCommandsEquality(far *v1alpha1.FenceAgentsRemediation) (bool, error) { expectedCommand := []string{fenceAgentIPMI, "--lanplus", "--password=password", "--username=admin", "--action=reboot", "--ip=192.168.111.1", "--ipport=6233"} fmt.Printf("%s is the command from production environment, and %s is the hardcoded expected command from test environment.\n", mocksExecuter.command, expectedCommand) - sort.Strings(mocksExecuter.command) - sort.Strings(expectedCommand) - return reflect.DeepEqual(mocksExecuter.command, expectedCommand), nil + return isEqualStringLists(mocksExecuter.command, expectedCommand), nil } // Implements Execute function to mock/test Execute of FenceAgentsRemediationReconciler From 49362d3dd61cb0b6d26c6a68dcdb2cf49fd35022 Mon Sep 17 00:00:00 2001 From: razo7 Date: Tue, 25 Jul 2023 17:52:28 +0300 Subject: [PATCH 2/4] Verify SuccessFAResponse was received Check on Reconcile that stdout from running the FA equlas to SuccessFAResponse --- controllers/fenceagentsremediation_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/fenceagentsremediation_controller.go b/controllers/fenceagentsremediation_controller.go index 20dda578..fec1a54c 100644 --- a/controllers/fenceagentsremediation_controller.go +++ b/controllers/fenceagentsremediation_controller.go @@ -158,7 +158,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct r.Log.Error(err, "Fence Agent response was a failure", "CR's Name", req.Name) return emptyResult, err } - if outputErr != "" || outputRes == "" { + if outputErr != "" || outputRes != SuccessFAResponse { // response wasn't failure or sucesss message err := fmt.Errorf("unknown fence agent response - expecting `%s` response, but we received `%s`", SuccessFAResponse, outputRes) r.Log.Error(err, "Fence Agent response wasn't a success message", "CR's Name", req.Name) From a5f47611359a7fe2728ad73c5d55314dbe473593 Mon Sep 17 00:00:00 2001 From: razo7 Date: Tue, 25 Jul 2023 17:57:41 +0300 Subject: [PATCH 3/4] Add error prints in build-fa-parameters We log an error prior to every time that the function returns an error --- controllers/fenceagentsremediation_controller.go | 15 ++++++++------- test/e2e/far_e2e_test.go | 11 +++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/controllers/fenceagentsremediation_controller.go b/controllers/fenceagentsremediation_controller.go index fec1a54c..7e98d750 100644 --- a/controllers/fenceagentsremediation_controller.go +++ b/controllers/fenceagentsremediation_controller.go @@ -170,19 +170,19 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct // buildFenceAgentParams collects the FAR's parameters for the node based on FAR CR, and if the CR is missing parameters // or the CR's name don't match nodeParamter name then return an error func buildFenceAgentParams(far *v1alpha1.FenceAgentsRemediation) ([]string, error) { + logger := ctrl.Log.WithName("build-fa-parameters") if far.Spec.NodeParameters == nil || far.Spec.SharedParameters == nil { - return nil, errors.New(errorMissingParams) + err := errors.New(errorMissingParams) + logger.Error(err, "Missing parameters") + return nil, err } var fenceAgentParams []string - logger := ctrl.Log.WithName("build-fa-parameters") // add shared parameters except the action parameter for paramName, paramVal := range far.Spec.SharedParameters { - if paramName == parameterActionName { - if paramVal != parameterActionValue { - logger.Info("FAR doesn't support any other action than reboot", "action", paramVal) - } - } else { + if paramName != parameterActionName { fenceAgentParams = appendParamToSlice(fenceAgentParams, paramName, paramVal) + } else if paramVal != parameterActionValue { + logger.Info("FAR doesn't support any other action than reboot", "action", paramVal) } } // ensure the FA uses the reboot action @@ -195,6 +195,7 @@ func buildFenceAgentParams(far *v1alpha1.FenceAgentsRemediation) ([]string, erro fenceAgentParams = appendParamToSlice(fenceAgentParams, paramName, nodeVal) } else { err := errors.New(errorMissingNodeParams) + logger.Error(err, "Missing matching nodeParam and CR's name") return nil, err } } diff --git a/test/e2e/far_e2e_test.go b/test/e2e/far_e2e_test.go index c5344dfa..4093bd29 100644 --- a/test/e2e/far_e2e_test.go +++ b/test/e2e/far_e2e_test.go @@ -20,6 +20,7 @@ import ( configv1 "github.com/openshift/api/config/v1" "github.com/medik8s/fence-agents-remediation/api/v1alpha1" + "github.com/medik8s/fence-agents-remediation/controllers" "github.com/medik8s/fence-agents-remediation/pkg/utils" e2eUtils "github.com/medik8s/fence-agents-remediation/test/e2e/utils" ) @@ -30,7 +31,6 @@ const ( fenceAgentAction = "reboot" nodeIdentifierPrefixAWS = "--plug" nodeIdentifierPrefixIPMI = "--ipport" - succeesRebootMessage = "\"Success: Rebooted" containerName = "manager" //TODO: try to minimize timeout @@ -104,11 +104,10 @@ var _ = Describe("FAR E2e", func() { }) When("running FAR to reboot two nodes", func() { It("should successfully remediate the first node", func() { - checkRemediation(nodeName, succeesRebootMessage, nodeBootTimeBefore) + checkRemediation(nodeName, nodeBootTimeBefore) }) It("should successfully remediate the second node", func() { - checkRemediation(nodeName, succeesRebootMessage, nodeBootTimeBefore) - + checkRemediation(nodeName,nodeBootTimeBefore) }) }) }) @@ -293,7 +292,7 @@ func wasNodeRebooted(nodeName string, nodeBootTimeBefore time.Time) { } // checkRemediation verify whether the node was remediated -func checkRemediation(nodeName, logString string, nodeBootTimeBefore time.Time) { +func checkRemediation(nodeName string, nodeBootTimeBefore time.Time) { By("Check if FAR NoExecute taint was added") wasFarTaintAdded(nodeName) @@ -301,7 +300,7 @@ func checkRemediation(nodeName, logString string, nodeBootTimeBefore time.Time) // TODO: When reboot is running only once and it is running on FAR node, then FAR pod will // be recreated on a new node and since the FA command won't be exuected again, then the log // won't include any success message - checkFarLogs(succeesRebootMessage) + checkFarLogs(controllers.SuccessFAResponse) By("Getting new node's boot time") wasNodeRebooted(nodeName, nodeBootTimeBefore) From ab75e8a1f4b3b236b6b285c2f961dc5424265cec Mon Sep 17 00:00:00 2001 From: razo7 Date: Wed, 26 Jul 2023 14:54:03 +0300 Subject: [PATCH 4/4] Default FA action is reboot, and we error given any other action Limit the usage of FAs to reboot action only --- .../fenceagentsremediation_controller.go | 17 +++++++++++------ .../fenceagentsremediation_controller_test.go | 1 - test/e2e/far_e2e_test.go | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/controllers/fenceagentsremediation_controller.go b/controllers/fenceagentsremediation_controller.go index 7e98d750..d8d6538a 100644 --- a/controllers/fenceagentsremediation_controller.go +++ b/controllers/fenceagentsremediation_controller.go @@ -141,8 +141,8 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct r.Log.Info("Combine fence agent parameters", "Fence Agent", far.Spec.Agent, "Node Name", req.Name) faParams, err := buildFenceAgentParams(far) if err != nil { - r.Log.Error(err, "Invalid sharedParameters/nodeParameters from CR", "CR's Name", req.Name) - return emptyResult, err + r.Log.Error(err, "Invalid sharedParameters/nodeParameters from CR - edit/recreate the CR", "CR's Name", req.Name) + return emptyResult, nil } // Add medik8s remediation taint r.Log.Info("Add Medik8s remediation taint", "Fence Agent", far.Spec.Agent, "Node Name", req.Name) @@ -158,7 +158,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct r.Log.Error(err, "Fence Agent response was a failure", "CR's Name", req.Name) return emptyResult, err } - if outputErr != "" || outputRes != SuccessFAResponse { + if outputErr != "" || outputRes != SuccessFAResponse+"\n" { // response wasn't failure or sucesss message err := fmt.Errorf("unknown fence agent response - expecting `%s` response, but we received `%s`", SuccessFAResponse, outputRes) r.Log.Error(err, "Fence Agent response wasn't a success message", "CR's Name", req.Name) @@ -168,7 +168,7 @@ func (r *FenceAgentsRemediationReconciler) Reconcile(ctx context.Context, req ct } // buildFenceAgentParams collects the FAR's parameters for the node based on FAR CR, and if the CR is missing parameters -// or the CR's name don't match nodeParamter name then return an error +// or the CR's name don't match nodeParamter name or it has an action which is different than reboot, then return an error func buildFenceAgentParams(far *v1alpha1.FenceAgentsRemediation) ([]string, error) { logger := ctrl.Log.WithName("build-fa-parameters") if far.Spec.NodeParameters == nil || far.Spec.SharedParameters == nil { @@ -182,10 +182,15 @@ func buildFenceAgentParams(far *v1alpha1.FenceAgentsRemediation) ([]string, erro if paramName != parameterActionName { fenceAgentParams = appendParamToSlice(fenceAgentParams, paramName, paramVal) } else if paramVal != parameterActionValue { - logger.Info("FAR doesn't support any other action than reboot", "action", paramVal) + // --action attribute was selected but it is differemt than reboot + err := errors.New("FAR doesn't support any other action than reboot") + logger.Error(err, "can't build CR with this action attribute", "action", paramVal) + return nil, err } } - // ensure the FA uses the reboot action + // if --action attribute was not selected, then it's default value is reboot + // https://github.com/ClusterLabs/fence-agents/blob/main/lib/fencing.py.py#L103 + // Therefore we can safely add the reboot action regardless if it was initially added into the CR fenceAgentParams = appendParamToSlice(fenceAgentParams, parameterActionName, parameterActionValue) // append node parameters diff --git a/controllers/fenceagentsremediation_controller_test.go b/controllers/fenceagentsremediation_controller_test.go index 9038c1c0..bbb5026c 100644 --- a/controllers/fenceagentsremediation_controller_test.go +++ b/controllers/fenceagentsremediation_controller_test.go @@ -55,7 +55,6 @@ var _ = Describe("FAR Controller", func() { invalidShareParam := map[v1alpha1.ParameterName]string{ "--username": "admin", "--password": "password", - "--action": "", "--ip": "192.168.111.1", "--lanplus": "", } diff --git a/test/e2e/far_e2e_test.go b/test/e2e/far_e2e_test.go index 4093bd29..e53c2114 100644 --- a/test/e2e/far_e2e_test.go +++ b/test/e2e/far_e2e_test.go @@ -107,7 +107,7 @@ var _ = Describe("FAR E2e", func() { checkRemediation(nodeName, nodeBootTimeBefore) }) It("should successfully remediate the second node", func() { - checkRemediation(nodeName,nodeBootTimeBefore) + checkRemediation(nodeName, nodeBootTimeBefore) }) }) })