-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add RanDU soft reboot test case with helper functions
Add a test case which soft reboots the cluster nodes via systemctl reboot, waits for the node to recover and validates that the test applications starts without issues.
- Loading branch information
Showing
5 changed files
with
263 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package reboot | ||
|
||
import ( | ||
"github.com/openshift-kni/eco-gosystem/tests/internal/cmd" | ||
) | ||
|
||
// SoftRebootNode executes systemctl reboot on a node. | ||
func SoftRebootNode(nodeName string) error { | ||
cmdToExec := []string{"chroot", "/rootfs", "systemctl", "reboot"} | ||
|
||
_, err := cmd.ExecCmd(cmdToExec, nodeName) | ||
|
||
if err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package sriov | ||
|
||
import ( | ||
"encoding/json" | ||
|
||
"github.com/golang/glog" | ||
"github.com/openshift-kni/eco-goinfra/pkg/clients" | ||
"github.com/openshift-kni/eco-goinfra/pkg/sriov" | ||
metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
// NetworkInfo structure to store pod network information. | ||
type NetworkInfo struct { | ||
Name string `json:"name"` | ||
} | ||
|
||
// ListNetworksByDeviceType returns a list of sriov networks matching the policies | ||
// device type. | ||
func ListNetworksByDeviceType( | ||
apiClient *clients.Settings, | ||
deviceType string, | ||
) ([]string, error) { | ||
var devNetworks []string | ||
|
||
operatornsname := "openshift-sriov-network-operator" | ||
options := metaV1.ListOptions{} | ||
sriovPolicies, err := sriov.ListPolicy(apiClient, operatornsname, options) | ||
|
||
if err != nil { | ||
glog.V(100).Infof("Failed to list sriov policies in namespace: %s", operatornsname) | ||
|
||
return nil, err | ||
} | ||
|
||
sriovNetworks, err := sriov.List(apiClient, operatornsname, options) | ||
|
||
if err != nil { | ||
glog.V(100).Infof("Failed to list sriov networks in namespace: %s", operatornsname) | ||
|
||
return nil, err | ||
} | ||
|
||
for _, policy := range sriovPolicies { | ||
if policy.Definition.Spec.DeviceType == deviceType { | ||
for _, network := range sriovNetworks { | ||
if policy.Definition.Spec.ResourceName == network.Definition.Spec.ResourceName { | ||
devNetworks = append(devNetworks, network.Definition.Name) | ||
} | ||
} | ||
} | ||
} | ||
|
||
return devNetworks, nil | ||
} | ||
|
||
// ExtractNetworkNames returns the name of the networks based on the pods | ||
// network status annotations. | ||
func ExtractNetworkNames(jsonData string) ([]string, error) { | ||
var networkInfo []NetworkInfo | ||
|
||
// Unmarshal the JSON data into the networkInfo slice. | ||
err := json.Unmarshal([]byte(jsonData), &networkInfo) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// Extract the interface names into a separate slice. | ||
var networkNames []string | ||
|
||
for _, info := range networkInfo { | ||
if info.Name != "ovn-kubernetes" { | ||
networkNames = append(networkNames, info.Name) | ||
} | ||
} | ||
|
||
return networkNames, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
package ran_du_system_test | ||
|
||
import ( | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
||
. "github.com/onsi/ginkgo/v2" | ||
. "github.com/onsi/gomega" | ||
"github.com/openshift-kni/eco-goinfra/pkg/deployment" | ||
"github.com/openshift-kni/eco-goinfra/pkg/namespace" | ||
"github.com/openshift-kni/eco-goinfra/pkg/nodes" | ||
"github.com/openshift-kni/eco-goinfra/pkg/pod" | ||
"github.com/openshift-kni/eco-goinfra/pkg/polarion" | ||
"github.com/openshift-kni/eco-gosystem/tests/internal/await" | ||
"github.com/openshift-kni/eco-gosystem/tests/internal/reboot" | ||
"github.com/openshift-kni/eco-gosystem/tests/internal/shell" | ||
"github.com/openshift-kni/eco-gosystem/tests/internal/sriov" | ||
. "github.com/openshift-kni/eco-gosystem/tests/ran-du/internal/randuinittools" | ||
"github.com/openshift-kni/eco-gosystem/tests/ran-du/internal/randuparams" | ||
"github.com/openshift-kni/eco-gosystem/tests/ran-du/internal/randutestworkload" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
var _ = Describe( | ||
"SoftReboot", | ||
Ordered, | ||
ContinueOnFailure, | ||
Label("SoftReboot"), func() { | ||
BeforeAll(func() { | ||
By("Preparing workload") | ||
|
||
if namespace.NewBuilder(APIClient, RanDuTestConfig.TestWorkload.Namespace).Exists() { | ||
err := randutestworkload.CleanNameSpace(randuparams.DefaultTimeout, RanDuTestConfig.TestWorkload.Namespace) | ||
Expect(err).ToNot(HaveOccurred(), "Failed to clean workload test namespace objects") | ||
} | ||
|
||
if RanDuTestConfig.TestWorkload.CreateMethod == "shell" { | ||
By("Launching workload using shell method") | ||
_, err := shell.ExecuteCmd(RanDuTestConfig.TestWorkload.CreateShellCmd) | ||
Expect(err).ToNot(HaveOccurred(), "Failed to launch workload") | ||
} | ||
|
||
By("Waiting for deployment replicas to become ready") | ||
_, err := await.WaitUntilAllDeploymentsReady(APIClient, RanDuTestConfig.TestWorkload.Namespace, | ||
randuparams.DefaultTimeout) | ||
Expect(err).ToNot(HaveOccurred(), "error while waiting for deployment to become ready") | ||
|
||
By("Waiting for statefulset replicas to become ready") | ||
_, err = await.WaitUntilAllStatefulSetsReady(APIClient, RanDuTestConfig.TestWorkload.Namespace, | ||
randuparams.DefaultTimeout) | ||
Expect(err).ToNot(HaveOccurred(), "error while waiting for statefulsets to become ready") | ||
|
||
}) | ||
It("Soft reboot nodes", polarion.ID("42738"), Label("SoftReboot"), func() { | ||
By("Retrieve nodes list") | ||
nodeList, err := nodes.List( | ||
APIClient, | ||
metav1.ListOptions{}, | ||
) | ||
Expect(err).ToNot(HaveOccurred(), "Error listing nodes.") | ||
|
||
By("Pull openshift-apiserver deployment spec") | ||
deploy, err := deployment.Pull(APIClient, "apiserver", "openshift-apiserver") | ||
Expect(err).ToNot(HaveOccurred(), "error while pulling openshift apiserver deployment") | ||
|
||
rebootIterations, err := strconv.Atoi(RanDuTestConfig.SoftRebootIterations) | ||
if err != nil { | ||
fmt.Println(err) | ||
} | ||
|
||
for r := 0; r < rebootIterations; r++ { | ||
By("Soft rebooting cluster") | ||
fmt.Printf("Soft reboot iteration no. %d\n", r) | ||
for _, node := range nodeList { | ||
By("Reboot node") | ||
fmt.Printf("Reboot node %s", node.Definition.Name) | ||
err = reboot.SoftRebootNode(node.Definition.Name) | ||
Expect(err).ToNot(HaveOccurred(), "Error rebooting the nodes.") | ||
|
||
By("Wait for node to become unreachable") | ||
fmt.Printf("Wait for node %s to become unreachable", node.Definition.Name) | ||
err = await.WaitUntilNodeIsUnreachable(node.Definition.Name, 3*time.Minute) | ||
Expect(err).ToNot(HaveOccurred(), "Node is still reachable: %s", err) | ||
|
||
By("Wait for the openshift apiserver deployment to be available") | ||
err = deploy.WaitUntilCondition("Available", 5*time.Minute) | ||
Expect(err).ToNot(HaveOccurred(), "openshift apiserver deployment has not recovered in time after reboot") | ||
|
||
By("Wait for two more minutes for the cluster resources to reconciliate their state") | ||
time.Sleep(2 * time.Minute) | ||
|
||
By("Remove any pods in UnexpectedAdmissionError state") | ||
listOptions := metav1.ListOptions{ | ||
FieldSelector: "status.phase=Failed", | ||
} | ||
podsList, err := pod.List(APIClient, RanDuTestConfig.TestWorkload.Namespace, listOptions) | ||
Expect(err).ToNot(HaveOccurred(), "could not retrieve pod list") | ||
|
||
for _, failedPod := range podsList { | ||
if failedPod.Definition.Status.Reason == "UnexpectedAdmissionError" { | ||
_, err := failedPod.DeleteAndWait(60 * time.Second) | ||
Expect(err).ToNot(HaveOccurred(), "could not delete pod in UnexpectedAdmissionError state") | ||
} | ||
} | ||
|
||
By("Waiting for deployment replicas to become ready") | ||
_, err = await.WaitUntilAllDeploymentsReady(APIClient, RanDuTestConfig.TestWorkload.Namespace, | ||
randuparams.DefaultTimeout) | ||
Expect(err).ToNot(HaveOccurred(), "error while waiting for deployment to become ready") | ||
|
||
By("Waiting for statefulset replicas to become ready") | ||
_, err = await.WaitUntilAllStatefulSetsReady(APIClient, RanDuTestConfig.TestWorkload.Namespace, | ||
randuparams.DefaultTimeout) | ||
Expect(err).ToNot(HaveOccurred(), "error while waiting for statefulsets to become ready") | ||
|
||
By("Retrieve pod list") | ||
podsList, err = pod.List(APIClient, RanDuTestConfig.TestWorkload.Namespace, metav1.ListOptions{}) | ||
Expect(err).ToNot(HaveOccurred(), "could not retrieve pod list") | ||
|
||
By("Retrieve sriov networks with vfio-pci driver") | ||
vfioNetworks, err := sriov.ListNetworksByDeviceType(APIClient, "vfio-pci") | ||
Expect(err).ToNot(HaveOccurred(), "error when retrieving sriov network using vfio-pci driver") | ||
|
||
By("Assert devices under /dev/vfio on pod are equal or more to the pods vfio-pci network attachments\n") | ||
for _, pod := range podsList { | ||
networkNames, err := sriov.ExtractNetworkNames(pod.Object.Annotations["k8s.v1.cni.cncf.io/network-status"]) | ||
Expect(err).ToNot(HaveOccurred(), "error when retrieving pod network attachments") | ||
|
||
podvfioDevices := 0 | ||
|
||
for _, vfioNet := range vfioNetworks { | ||
for _, podNet := range networkNames { | ||
if strings.Contains(podNet, pod.Definition.Namespace+"/"+vfioNet) { | ||
podvfioDevices++ | ||
} | ||
} | ||
} | ||
|
||
if podvfioDevices > 0 { | ||
fmt.Printf("Check /dev/vfio on pod %s\n", pod.Definition.Name) | ||
lscmd := []string{"ls", "--color=never", "/dev/vfio"} | ||
cmd, err := pod.ExecCommand(lscmd) | ||
Expect(err).ToNot(HaveOccurred(), "error when executing command on pod") | ||
|
||
// retry in case the command exec returns an empty string | ||
if len(cmd.String()) == 0 { | ||
cmd, err = pod.ExecCommand(lscmd) | ||
Expect(err).ToNot(HaveOccurred(), "error when executing command on pod") | ||
} | ||
|
||
vfioDevls := strings.Fields(strings.ReplaceAll(cmd.String(), "vfio", "")) | ||
Expect(len(vfioDevls)).To(BeNumerically(">=", podvfioDevices), | ||
"error: vfio devices inside pod( %s ) do not match pod %s attachments:", cmd.String(), pod.Definition.Name) | ||
} | ||
} | ||
} | ||
} | ||
}) | ||
AfterAll(func() { | ||
By("Cleaning up test workload resources") | ||
err := randutestworkload.CleanNameSpace(randuparams.DefaultTimeout, RanDuTestConfig.TestWorkload.Namespace) | ||
Expect(err).ToNot(HaveOccurred(), "Failed to clean workload test namespace objects") | ||
}) | ||
}) |