Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(e2e-cilium): add e2e test for cilium event observer plugin #665

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# Test binary, built with `go test -c`
*.test

*.pem

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

Expand All @@ -35,6 +37,8 @@ output

dist/
bin/
.certs/
site/build/

image-metadata-*.json
*packetmonitorsupport*/
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,10 @@ test: $(ENVTEST) # Run unit tests.
go build -o test-summary ./test/utsummary/main.go
CGO_ENABLED=0 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use -p path)" go test -tags=unit,dashboard -skip=TestE2E* -coverprofile=coverage.out -v -json ./... | ./test-summary --progress --verbose

E2E_TEST_PATH ?= ./test/e2e/*.go
test-e2e: $(ENVTEST) # Run e2e tests.
CGO_ENABLED=0 go test -v $(E2E_TEST_PATH) -timeout 30m -tags=e2e -count=1 -args -image-tag=$(IMAGE_TAG) -image-registry=acnpublic.azurecr.io -image-namespace=microsoft/retina

coverage: # Code coverage.
# go generate ./... && go test -tags=unit -coverprofile=coverage.out.tmp ./...
cat coverage.out | grep -v "_bpf.go\|_bpfel_x86.go\|_bpfel_arm64.go|_generated.go|mock_" | grep -v mock > coveragenew.out
Expand Down
14 changes: 13 additions & 1 deletion test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,19 @@
## Starter Example

When authoring tests, make sure to prefix the test name with `TestE2E` so that it is skipped by existing pipeline unit test framework.
For reference, see the `test-all` recipe in the root [Makefile](../../Makefile).
For reference, see the `test-e2e` recipe in the root [Makefile](../../Makefile).

## Running E2E Test

You can execute all e2e tests in an AKS cluster. The image tag should be the image tag from the *Build Agent Images* pipeline on your PR.
You can also execute specific e2e tests (non-cilium and cilium) by setting `E2E_TEST_PATH`. If left empty, it will run all tests.
Example command:

```bash
export AZURE_SUBSCRIPTION_ID=<YOUR-SUBSCRIPTION> && \
export AZURE_LOCATION=<location> && \
IMAGE_TAG=<YOUR-IMAGE-TAG> make test-e2e
```

For sample test, please check out:
[the Retina E2E.](./scenarios/retina/drop/scenario.go)
109 changes: 109 additions & 0 deletions test/e2e/framework/azure/create-cluster-with-cilium.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package azure

import (
"context"
"fmt"
"log"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
)

type CreateCiliumCluster struct {
SubscriptionID string
ResourceGroupName string
Location string
ClusterName string
}

func (c *CreateCiliumCluster) Prevalidate() error {
return nil
}

func (c *CreateCiliumCluster) Stop() error {
return nil
}

func (c *CreateCiliumCluster) Run() error {
// Start with default cluster template
ciliumCluster := GetStarterClusterTemplate(c.Location)

ciliumCluster.Properties.NetworkProfile.NetworkPlugin = to.Ptr(armcontainerservice.NetworkPluginAzure)
ciliumCluster.Properties.NetworkProfile.NetworkPluginMode = to.Ptr(armcontainerservice.NetworkPluginModeOverlay)
ciliumCluster.Properties.NetworkProfile.NetworkDataplane = to.Ptr(armcontainerservice.NetworkDataplaneCilium)
ipv4 := armcontainerservice.IPFamilyIPv4
ipv6 := armcontainerservice.IPFamilyIPv6
ciliumCluster.Properties.NetworkProfile.IPFamilies = []*armcontainerservice.IPFamily{&ipv4, &ipv6}

//nolint:appendCombine // separate for verbosity
ciliumCluster.Properties.AgentPoolProfiles = append(ciliumCluster.Properties.AgentPoolProfiles, &armcontainerservice.ManagedClusterAgentPoolProfile{ //nolint:all
Type: to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets),
// AvailabilityZones: []*string{to.Ptr("1")},
Count: to.Ptr[int32](AuxilaryNodeCount),
EnableNodePublicIP: to.Ptr(false),
Mode: to.Ptr(armcontainerservice.AgentPoolModeUser),
OSType: to.Ptr(armcontainerservice.OSTypeLinux),
ScaleDownMode: to.Ptr(armcontainerservice.ScaleDownModeDelete),
VMSize: to.Ptr(AgentARMSKU),
Name: to.Ptr("arm64"),
MaxPods: to.Ptr(int32(MaxPodsPerNode)),
})

ciliumCluster.Properties.AutoUpgradeProfile = &armcontainerservice.ManagedClusterAutoUpgradeProfile{
NodeOSUpgradeChannel: to.Ptr(armcontainerservice.NodeOSUpgradeChannelNodeImage),
}

// Deploy cluster
cred, err := azidentity.NewAzureCLICredential(nil)
if err != nil {
return fmt.Errorf("failed to obtain a credential: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), clusterTimeout)
defer cancel()

clientFactory, err := armcontainerservice.NewClientFactory(c.SubscriptionID, cred, nil)
if err != nil {
return fmt.Errorf("failed to create az client: %w", err)
}

log.Printf("when the cluster is ready, use the below command to access and debug")
log.Printf("az aks get-credentials --resource-group %s --name %s --subscription %s", c.ResourceGroupName, c.ClusterName, c.SubscriptionID)
log.Printf("creating cluster \"%s\" in resource group \"%s\"...", c.ClusterName, c.ResourceGroupName)

poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, ciliumCluster, nil)
if err != nil {
return fmt.Errorf("failed to finish the create cluster request: %w", err)
}

notifychan := make(chan struct{})
go func() {
_, err = poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{
Frequency: pollFrequency,
})
if err != nil {
log.Printf("failed to create cluster: %v\n", err)
} else {
log.Printf("cluster %s is ready\n", c.ClusterName)
}
close(notifychan)
}()

ticker := time.NewTicker(clusterCreateTicker)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("failed to create cluster: %w", ctx.Err())
case <-ticker.C:
log.Printf("waiting for cluster %s to be ready...\n", c.ClusterName)
case <-notifychan:
if err != nil {
return fmt.Errorf("received notification, failed to create cluster: %w", err)
}
return nil
}
}
}
49 changes: 49 additions & 0 deletions test/e2e/jobs/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"github.com/microsoft/retina/test/e2e/framework/generic"
"github.com/microsoft/retina/test/e2e/framework/kubernetes"
"github.com/microsoft/retina/test/e2e/framework/types"
"github.com/microsoft/retina/test/e2e/scenarios/ciliumeventobserver"
"github.com/microsoft/retina/test/e2e/scenarios/dns"
"github.com/microsoft/retina/test/e2e/scenarios/drop"
"github.com/microsoft/retina/test/e2e/scenarios/latency"
Expand Down Expand Up @@ -50,6 +51,32 @@ func CreateTestInfra(subID, clusterName, location, kubeConfigFilePath string) *t
return job
}

func CreateTestInfraCilium(subID, clusterName, location, kubeConfigFilePath string) *types.Job {
job := types.NewJob("Create e2e test infrastructure on cilium dataplane")

job.AddStep(&azure.CreateResourceGroup{
SubscriptionID: subID,
ResourceGroupName: clusterName,
Location: location,
}, nil)

job.AddStep(&azure.CreateCiliumCluster{
ClusterName: clusterName,
}, nil)

job.AddStep(&azure.GetAKSKubeConfig{
KubeConfigFilePath: kubeConfigFilePath,
}, nil)

job.AddStep(&generic.LoadFlags{
TagEnv: generic.DefaultTagEnv,
ImageNamespaceEnv: generic.DefaultImageNamespace,
ImageRegistryEnv: generic.DefaultImageRegistry,
}, nil)

return job
}

func DeleteTestInfra(subID, clusterName, location string) *types.Job {
job := types.NewJob("Delete e2e test infrastructure")

Expand Down Expand Up @@ -186,3 +213,25 @@ func UpgradeAndTestRetinaAdvancedMetrics(kubeConfigFilePath, chartPath, valuesFi

return job
}

func InstallAndTestRetinaCiliumMetrics(kubeConfigFilePath, chartPath, valuesFilePath string) *types.Job {
job := types.NewJob("Install and test Retina with Cilium metrics")

job.AddStep(&kubernetes.InstallHelmChart{
Namespace: "kube-system",
ReleaseName: "retina",
KubeConfigFilePath: kubeConfigFilePath,
ChartPath: chartPath,
TagEnv: generic.DefaultTagEnv,
}, nil)

// Upgade to ciliumeventobserver plugin
job.AddStep(&kubernetes.UpgradeRetinaHelmChart{
ValuesFile: valuesFilePath,
}, nil)

job.AddScenario(ciliumeventobserver.ValidateCiliumEventObserverDropMetric())
job.AddScenario(ciliumeventobserver.ValidateCiliumEventObserverFlowsAndTCPMetrics())

return job
}
64 changes: 64 additions & 0 deletions test/e2e/retina_cilium_e2e_test.go
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this will run sequentially, can we make it run in parallel, preferably in another gh workflow stage so we can see output separately for both clusters.

You might want to add anoher one of this

Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package retina

import (
"crypto/rand"
"math/big"
"os"
"os/user"
"path/filepath"
"strconv"
"testing"
"time"

"github.com/microsoft/retina/test/e2e/common"
"github.com/microsoft/retina/test/e2e/framework/types"
jobs "github.com/microsoft/retina/test/e2e/jobs"
"github.com/stretchr/testify/require"
)

var retinaCiliumLocations = []string{"eastus2", "centralus", "southcentralus", "uksouth", "centralindia", "westus2"}

// TestE2ERetinaCilium tests all e2e scenarios for retina on cilium clusters
func TestE2ERetinaCilium(t *testing.T) {
curuser, err := user.Current()
require.NoError(t, err)
clusterName := curuser.Username + common.NetObsRGtag + "cil-" + strconv.FormatInt(time.Now().Unix(), 10)

subID := os.Getenv("AZURE_SUBSCRIPTION_ID")
require.NotEmpty(t, subID)

location := os.Getenv("AZURE_LOCATION")
if location == "" {
var nBig *big.Int
nBig, err = rand.Int(rand.Reader, big.NewInt(int64(len(retinaCiliumLocations))))
if err != nil {
t.Fatalf("Failed to generate a secure random index: %v", err)
}
location = retinaCiliumLocations[nBig.Int64()]
}

cwd, err := os.Getwd()
require.NoError(t, err)

// Get to root of the repo by going up two directories
rootDir := filepath.Dir(filepath.Dir(cwd))

chartPath := filepath.Join(rootDir, "deploy", "hubble", "manifests", "controller", "helm", "retina")
profilePath := filepath.Join(rootDir, "test", "profiles", "advanced", "hubble", "cilium", "values.yaml")
kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem")

// CreateTestInfra with Cilium dataplane
createTestInfra := types.NewRunner(t, jobs.CreateTestInfraCilium(subID, clusterName, location, kubeConfigFilePath))
createTestInfra.Run()

// Hacky way to ensure that the test infra is deleted even if the test panics
defer func() {
if r := recover(); r != nil {
t.Logf("Recovered in TestE2ERetinaCilium, %v", r)
}
_ = jobs.DeleteTestInfra(subID, clusterName, location).Run()
}()

advanceMetricsE2E := types.NewRunner(t, jobs.InstallAndTestRetinaCiliumMetrics(kubeConfigFilePath, chartPath, profilePath))
advanceMetricsE2E.Run()
}
Loading
Loading