Skip to content

Commit

Permalink
scale experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
sbueringer committed Jun 13, 2023
1 parent b47b77b commit 795eaa3
Show file tree
Hide file tree
Showing 23 changed files with 286 additions and 135 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ hack/tools/bin

# E2E test templates
test/e2e/data/infrastructure-docker/**/cluster-template*.yaml
test/e2e/data/infrastructure-inmemory/**/cluster-template*.yaml

# Output of Makefile targets using sed on MacOS systems
*.yaml-e
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ generate-modules: ## Run go mod tidy to ensure modules are up to date
generate-e2e-templates: $(KUSTOMIZE) $(addprefix generate-e2e-templates-, v0.4 v1.0 v1.3 v1.4 main) ## Generate cluster templates for all versions

DOCKER_TEMPLATES := test/e2e/data/infrastructure-docker
GOOFY_TEMPLATES := test/e2e/data/infrastructure-inmemory

.PHONY: generate-e2e-templates-v0.4
generate-e2e-templates-v0.4: $(KUSTOMIZE)
Expand Down Expand Up @@ -546,6 +547,8 @@ generate-e2e-templates-main: $(KUSTOMIZE)
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-ignition --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-ignition.yaml

$(KUSTOMIZE) build $(GOOFY_TEMPLATES)/main/cluster-template --load-restrictor LoadRestrictionsNone > $(GOOFY_TEMPLATES)/main/cluster-template.yaml

.PHONY: generate-metrics-config
generate-metrics-config: $(ENVSUBST_BIN) ## Generate ./hack/observability/kube-state-metrics/crd-config.yaml
OUTPUT_FILE="${OBSERVABILITY_DIR}/kube-state-metrics/crd-config.yaml"; \
Expand Down
3 changes: 3 additions & 0 deletions bootstrap/kubeadm/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ spec:
- "--bootstrap-token-ttl=${KUBEADM_BOOTSTRAP_TOKEN_TTL:=15m}"
image: controller:latest
name: manager
env:
- name: GOMAXPROCS
value: "16"
ports:
- containerPort: 9440
name: healthz
Expand Down
8 changes: 6 additions & 2 deletions bootstrap/kubeadm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"fmt"
_ "net/http/pprof"
"os"
goruntime "runtime"
"time"

// +kubebuilder:scaffold:imports
Expand Down Expand Up @@ -128,10 +129,10 @@ func InitFlags(fs *pflag.FlagSet) {
fs.DurationVar(&syncPeriod, "sync-period", 10*time.Minute,
"The minimum interval at which watched resources are reconciled (e.g. 15m)")

fs.Float32Var(&restConfigQPS, "kube-api-qps", 20,
fs.Float32Var(&restConfigQPS, "kube-api-qps", 100,
"Maximum queries per second from the controller client to the Kubernetes API server. Defaults to 20")

fs.IntVar(&restConfigBurst, "kube-api-burst", 30,
fs.IntVar(&restConfigBurst, "kube-api-burst", 200,
"Maximum number of queries that should be allowed in one burst from the controller client to the Kubernetes API server. Default 30")

fs.DurationVar(&tokenTTL, "bootstrap-token-ttl", kubeadmbootstrapcontrollers.DefaultTokenTTL,
Expand All @@ -155,6 +156,9 @@ func InitFlags(fs *pflag.FlagSet) {
}

func main() {
// Every event with a duration > time.Millisecond will be included in the profile.
goruntime.SetBlockProfileRate(int(time.Millisecond))

InitFlags(pflag.CommandLine)
pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
Expand Down
2 changes: 2 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: GOMAXPROCS
value: "16"
ports:
- containerPort: 9440
name: healthz
Expand Down
2 changes: 2 additions & 0 deletions controlplane/kubeadm/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: GOMAXPROCS
value: "16"
ports:
- containerPort: 9440
name: healthz
Expand Down
8 changes: 6 additions & 2 deletions controlplane/kubeadm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"fmt"
_ "net/http/pprof"
"os"
goruntime "runtime"
"time"

// +kubebuilder:scaffold:imports
Expand Down Expand Up @@ -130,10 +131,10 @@ func InitFlags(fs *pflag.FlagSet) {
fs.DurationVar(&syncPeriod, "sync-period", 10*time.Minute,
"The minimum interval at which watched resources are reconciled (e.g. 15m)")

fs.Float32Var(&restConfigQPS, "kube-api-qps", 20,
fs.Float32Var(&restConfigQPS, "kube-api-qps", 100,
"Maximum queries per second from the controller client to the Kubernetes API server. Defaults to 20")

fs.IntVar(&restConfigBurst, "kube-api-burst", 30,
fs.IntVar(&restConfigBurst, "kube-api-burst", 200,
"Maximum number of queries that should be allowed in one burst from the controller client to the Kubernetes API server. Default 30")

fs.StringVar(&watchFilterValue, "watch-filter", "",
Expand All @@ -159,6 +160,9 @@ func InitFlags(fs *pflag.FlagSet) {
feature.MutableGates.AddFlag(fs)
}
func main() {
// Every event with a duration > time.Millisecond will be included in the profile.
goruntime.SetBlockProfileRate(int(time.Millisecond))

InitFlags(pflag.CommandLine)
pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
Expand Down
6 changes: 3 additions & 3 deletions hack/observability/grafana/dashboards/cluster-api.json
Original file line number Diff line number Diff line change
Expand Up @@ -3399,7 +3399,7 @@
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "sum(capi_machine_info{control_plane_name=\"\"} * on(name) group_right () (capi_machine_status_condition{status=\"True\"} == 1)) by (type)",
"expr": "sum(avg(capi_machine_info{control_plane_name=\"\"}) by (name) * on(name) group_right () (capi_machine_status_condition{status=\"True\"} == 1)) by (type)",
"hide": false,
"legendFormat": "{{type}}",
"range": true,
Expand All @@ -3411,7 +3411,7 @@
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "sum(capi_machine_info{control_plane_name=\"\"} * on(name) group_right () (capi_machine_status_condition{status=\"False\"} == 1)) by (type)",
"expr": "sum(avg(capi_machine_info{control_plane_name=\"\"}) by (name) * on(name) group_right () (capi_machine_status_condition{status=\"False\"} == 1)) by (type)",
"hide": false,
"legendFormat": "{{type}} (False)",
"range": true,
Expand Down Expand Up @@ -3594,7 +3594,7 @@
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "sum(capi_machine_info{control_plane_name=\"\"} * on (name) group_right () (capi_machine_status_phase == 1)) by (phase)",
"expr": "sum(avg(capi_machine_info{control_plane_name=\"\"}) by (name) * on (name) group_right () (capi_machine_status_phase == 1)) by (phase)",
"legendFormat": "{{phase}}",
"range": true,
"refId": "A"
Expand Down
95 changes: 1 addition & 94 deletions hack/observability/grafana/dashboards/controller-runtime.json
Original file line number Diff line number Diff line change
Expand Up @@ -1815,7 +1815,7 @@
},
"gridPos": {
"h": 8,
"w": 12,
"w": 24,
"x": 0,
"y": 11
},
Expand Down Expand Up @@ -1848,99 +1848,6 @@
"title": "Reconcile Duration",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 11
},
"id": 28,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "sum(rate(controller_runtime_reconcile_time_seconds_sum{pod=~\"$Pod\",controller=~\"$Controller\"}[5m])) by (pod,controller) / sum(rate(controller_runtime_reconcile_time_seconds_count{pod=~\"$Pod\",controller=~\"$Controller\"}[5m])) by (pod,controller)",
"legendFormat": "{{pod}}: {{controller}}",
"range": true,
"refId": "A"
}
],
"title": "Reconcile Duration by Controller",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
Expand Down
8 changes: 8 additions & 0 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package machine
import (
"context"
"fmt"
"runtime/trace"
"time"

"github.com/pkg/errors"
Expand Down Expand Up @@ -138,6 +139,9 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
}

func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
ctx, task := trace.NewTask(ctx, "Machine.Reconcile")
defer task.End()

// Fetch the Machine instance
m := &clusterv1.Machine{}
if err := r.Client.Get(ctx, req.NamespacedName, m); err != nil {
Expand Down Expand Up @@ -288,6 +292,10 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,

res := ctrl.Result{}
errs := []error{}

ctx, task := trace.NewTask(ctx, "Machine.phases")
defer task.End()

for _, phase := range phases {
// Call the inner reconciliation methods.
phaseResult, err := phase(ctx, cluster, m)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package machine

import (
"context"
"runtime/trace"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand All @@ -32,21 +33,28 @@ import (
)

func (r *Reconciler) reconcileInterruptibleNodeLabel(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (ctrl.Result, error) {
ctx, task := trace.NewTask(ctx, "Machine.phases.reconcileInterruptibleNodeLabel")
defer task.End()

// Check that the Machine hasn't been deleted or in the process
// and that the Machine has a NodeRef.
if !machine.DeletionTimestamp.IsZero() || machine.Status.NodeRef == nil {
return ctrl.Result{}, nil
}

// Get the infrastructure object
ctx, task = trace.NewTask(ctx, "Machine.phases.reconcileInterruptibleNodeLabel.external.Get")
infra, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace)
if err != nil {
return ctrl.Result{}, err
}
task.End()

log := ctrl.LoggerFrom(ctx)

// Get interruptible instance status from the infrastructure provider.
ctx, task = trace.NewTask(ctx, "Machine.phases.reconcileInterruptibleNodeLabel.status.interruptible")
defer task.End()
interruptible, _, err := unstructured.NestedBool(infra.Object, "status", "interruptible")
if err != nil {
log.V(1).Error(err, "Failed to get interruptible status from infrastructure provider", "Machine", klog.KObj(machine))
Expand Down
4 changes: 4 additions & 0 deletions internal/controllers/machine/machine_controller_noderef.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package machine
import (
"context"
"fmt"
"runtime/trace"
"strings"

"github.com/pkg/errors"
Expand All @@ -42,6 +43,9 @@ var (
)

func (r *Reconciler) reconcileNode(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (ctrl.Result, error) {
ctx, task := trace.NewTask(ctx, "Machine.phases.reconcileNode")
defer task.End()

log := ctrl.LoggerFrom(ctx)

// Create a watch on the nodes in the Cluster.
Expand Down
Loading

0 comments on commit 795eaa3

Please sign in to comment.