diff --git a/helm/tailing-sidecar-operator/conf/operator.yaml b/helm/tailing-sidecar-operator/conf/operator.yaml index 39f54171..a7d2adf7 100644 --- a/helm/tailing-sidecar-operator/conf/operator.yaml +++ b/helm/tailing-sidecar-operator/conf/operator.yaml @@ -2,3 +2,7 @@ sidecar: image: {{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag | default .Chart.AppVersion }} resources: {{- .Values.sidecar.resources | toYaml | nindent 4 }} +leaderElection: + leaseDuration: {{ .Values.operator.leaderElection.leaseDuration }} + renewDeadline: {{ .Values.operator.leaderElection.renewDeadline }} + retryPeriod: {{ .Values.operator.leaderElection.retryPeriod }} diff --git a/helm/tailing-sidecar-operator/templates/resources.yaml b/helm/tailing-sidecar-operator/templates/resources.yaml index d7d3347c..9c5b028c 100644 --- a/helm/tailing-sidecar-operator/templates/resources.yaml +++ b/helm/tailing-sidecar-operator/templates/resources.yaml @@ -443,7 +443,9 @@ spec: name: https - args: - --metrics-addr=127.0.0.1:8080 + {{- if .Values.operator.leaderElection.enabled }} - --enable-leader-election + {{- end }} - --config=/tailing-sidecar/config/config.yaml command: - /manager diff --git a/helm/tailing-sidecar-operator/values.yaml b/helm/tailing-sidecar-operator/values.yaml index dc5134d1..b3727048 100644 --- a/helm/tailing-sidecar-operator/values.yaml +++ b/helm/tailing-sidecar-operator/values.yaml @@ -14,6 +14,15 @@ operator: requests: cpu: 100m memory: 20Mi + + ## Enable leader election mechanism for protecting against split brain if multiple operator pods/replicas are started. + ## See more at https://docs.openshift.com/container-platform/4.10/operators/operator_sdk/osdk-leader-election.html + leaderElection: + enabled: true + leaseDuration: "137s" + renewDeadline: "107s" + retryPeriod: "26s" + livenessProbe: {} # initialDelaySeconds: 1 # periodSeconds: 20 diff --git a/operator/config.go b/operator/config.go index b7072dbd..d2d76af4 100644 --- a/operator/config.go +++ b/operator/config.go @@ -1,7 +1,10 @@ package main import ( + "encoding/json" + "errors" "os" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -9,7 +12,8 @@ import ( ) type Config struct { - Sidecar SidecarConfig `yaml:"sidecar,omitempty"` + Sidecar SidecarConfig `yaml:"sidecar,omitempty"` + LeaderElection LeaderElectionConfig `yaml:"leaderElection,omitempty"` } type SidecarConfig struct { @@ -17,6 +21,40 @@ type SidecarConfig struct { Resources corev1.ResourceRequirements `yaml:"resources,omitempty"` } +type LeaderElectionConfig struct { + LeaseDuration Duration `yaml:"leaseDuration,omitempty"` + RenewDeadline Duration `yaml:"renewDeadline,omitempty"` + RetryPeriod Duration `yaml:"retryPeriod,omitempty"` +} + +// Duration sigs.k8s.io/yaml not support time.Duration:https://github.com/kubernetes-sigs/yaml/issues/64 +type Duration time.Duration + +func (d Duration) MarshalJSON() ([]byte, error) { + return json.Marshal(time.Duration(d).String()) +} + +func (d *Duration) UnmarshalJSON(b []byte) error { + var v interface{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + switch value := v.(type) { + case float64: + *d = Duration(time.Duration(value)) + return nil + case string: + tmp, err := time.ParseDuration(value) + if err != nil { + return err + } + *d = Duration(tmp) + return nil + default: + return errors.New("invalid duration") + } +} + func ReadConfig(configPath string, config *Config) error { content, err := os.ReadFile(configPath) if err != nil { @@ -48,5 +86,11 @@ func GetDefaultConfig() Config { }, }, }, + // reference for values: https://github.com/open-telemetry/opentelemetry-operator/blob/a8653601cd6a6e2b35fd7f3e1a28b4e9608fb794/main.go#L181 + LeaderElection: LeaderElectionConfig{ + LeaseDuration: Duration(time.Second * 137), + RenewDeadline: Duration(time.Second * 107), + RetryPeriod: Duration(time.Second * 26), + }, } } diff --git a/operator/config_test.go b/operator/config_test.go index 6de8a132..383d2f6f 100644 --- a/operator/config_test.go +++ b/operator/config_test.go @@ -4,6 +4,7 @@ import ( "io/ioutil" "os" "testing" + "time" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -34,6 +35,11 @@ func TestReadConfig(t *testing.T) { }, }, }, + LeaderElection: LeaderElectionConfig{ + LeaseDuration: Duration(time.Second * 137), + RenewDeadline: Duration(time.Second * 107), + RetryPeriod: Duration(time.Second * 26), + }, }, expectedError: nil, }, @@ -58,6 +64,11 @@ sidecar: }, }, }, + LeaderElection: LeaderElectionConfig{ + LeaseDuration: Duration(time.Second * 137), + RenewDeadline: Duration(time.Second * 107), + RetryPeriod: Duration(time.Second * 26), + }, }, expectedError: nil, }, @@ -72,7 +83,11 @@ sidecar: memory: 400Mi requests: cpu: 20m - memory: 20Mi`, + memory: 20Mi +leaderElection: + leaseDuration: 10s + renewDeadline: 10s + retryPeriod: 10s`, expected: Config{ Sidecar: SidecarConfig{ Image: "my-new-image", @@ -87,6 +102,11 @@ sidecar: }, }, }, + LeaderElection: LeaderElectionConfig{ + LeaseDuration: Duration(time.Second * 10), + RenewDeadline: Duration(time.Second * 10), + RetryPeriod: Duration(time.Second * 10), + }, }, expectedError: nil, }, diff --git a/operator/main.go b/operator/main.go index 936e61a3..ea10da48 100644 --- a/operator/main.go +++ b/operator/main.go @@ -19,6 +19,7 @@ package main import ( "flag" "os" + "time" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -85,6 +86,9 @@ func main() { Port: 9443, LeaderElection: enableLeaderElection, LeaderElectionID: "7b555970.sumologic.com", + LeaseDuration: (*time.Duration)(&config.LeaderElection.LeaseDuration), + RenewDeadline: (*time.Duration)(&config.LeaderElection.RenewDeadline), + RetryPeriod: (*time.Duration)(&config.LeaderElection.RetryPeriod), }) if err != nil { setupLog.Error(err, "unable to start manager")