Skip to content

Commit

Permalink
CNF-14356: Add ClusterNotReady reason to ConfigurationApplied condition
Browse files Browse the repository at this point in the history
  • Loading branch information
irinamihai committed Sep 16, 2024
1 parent af46bd1 commit a1cab9e
Show file tree
Hide file tree
Showing 5 changed files with 734 additions and 86 deletions.
162 changes: 93 additions & 69 deletions internal/controllers/clusterrequest_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,12 +235,11 @@ func (t *clusterRequestReconcilerTask) run(ctx context.Context) (ctrl.Result, er
}
}

// Handle policy configuration only after the cluster provisioning
// has started.
crProvisionedCond := meta.FindStatusCondition(
t.object.Status.Conditions, string(utils.CRconditionTypes.ClusterProvisioned))
if crProvisionedCond != nil &&
crProvisionedCond.Status == metav1.ConditionTrue &&
crProvisionedCond.Reason == string(utils.CRconditionReasons.Completed) {
// Handle configuration through policies.
if crProvisionedCond != nil {
requeue, err := t.handleClusterPolicyConfiguration(ctx)
if err != nil {
if utils.IsInputError(err) {
Expand Down Expand Up @@ -778,26 +777,6 @@ func (t *clusterRequestReconcilerTask) handleClusterPolicyConfiguration(ctx cont
return false, fmt.Errorf("status.clusterInstanceRef is empty")
}

clusterIsReadyForPolicyConfig, err := utils.ClusterIsReadyForPolicyConfig(
ctx, t.client, t.object.Status.ClusterInstanceRef.Name,
)
if err != nil {
return false, fmt.Errorf(
"error determining if the cluster is ready for policy configuration: %w", err)
}

if !clusterIsReadyForPolicyConfig {
t.logger.InfoContext(
ctx,
fmt.Sprintf(
"Cluster %s (%s) is not ready for policy configuration",
t.object.Status.ClusterInstanceRef.Name,
t.object.Status.ClusterInstanceRef.Name,
),
)
return false, nil
}

// Get all the child policies in the namespace of the managed cluster created through
// the ClusterRequest.
policies := &policiesv1.PolicyList{}
Expand Down Expand Up @@ -865,15 +844,26 @@ func (t *clusterRequestReconcilerTask) hasPolicyConfigurationTimedOut(ctx contex
switch configurationAppliedCondition.Reason {
case string(utils.CRconditionReasons.InProgress):
// Check if the configuration application has timed out.
if time.Since(t.object.Status.ClusterInstanceRef.NonCompliantAt.Time) > time.Duration(t.object.Spec.Timeout.Configuration)*time.Minute {
policyTimedOut = true
if t.object.Status.ClusterInstanceRef.NonCompliantAt.IsZero() {
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Now()
} else {
// If NonCompliantAt has been previously set, check for timeout.
policyTimedOut = utils.TimeoutExceeded(t.object)
}
case string(utils.CRconditionReasons.TimedOut):
policyTimedOut = true
case string(utils.CRconditionReasons.Missing):
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Now()
case string(utils.CRconditionReasons.OutOfDate):
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Now()
case string(utils.CRconditionReasons.ClusterNotReady):
// The cluster might not be ready because its being initially provisioned or
// there are problems after provisionion, so it might be that NonCompliantAt
// has been previously set.
if !t.object.Status.ClusterInstanceRef.NonCompliantAt.IsZero() {
// If NonCompliantAt has been previously set, check for timeout.
policyTimedOut = utils.TimeoutExceeded(t.object)
}
default:
t.logger.InfoContext(ctx,
fmt.Sprintf("Unexpected Reason for condition type %s",
Expand All @@ -892,7 +882,19 @@ func (t *clusterRequestReconcilerTask) hasPolicyConfigurationTimedOut(ctx contex
// based on the state of the policies matched with the managed cluster.
func (t *clusterRequestReconcilerTask) updateConfigurationAppliedStatus(
ctx context.Context, targetPolicies []oranv1alpha1.PolicyDetails, allPoliciesCompliant bool,
nonCompliantPolicyInEnforce bool) error {
nonCompliantPolicyInEnforce bool) (err error) {
err = nil

defer func() {
t.object.Status.Policies = targetPolicies
// Update the current policy status.
if updateErr := utils.UpdateK8sCRStatus(ctx, t.client, t.object); updateErr != nil {
err = fmt.Errorf("failed to update status for ClusterRequest %s: %w", t.object.Name, updateErr)
} else {
err = nil
}
}()

if len(targetPolicies) == 0 {
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Time{}
utils.SetStatusCondition(&t.object.Status.Conditions,
Expand All @@ -901,52 +903,74 @@ func (t *clusterRequestReconcilerTask) updateConfigurationAppliedStatus(
metav1.ConditionFalse,
"No configuration present",
)
} else {
// Update the ConfigurationApplied condition.
if allPoliciesCompliant {
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Time{}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.CRconditionTypes.ConfigurationApplied,
utils.CRconditionReasons.Completed,
metav1.ConditionTrue,
"The configuration is up to date",
)
} else {
if nonCompliantPolicyInEnforce {
policyTimedOut := t.hasPolicyConfigurationTimedOut(ctx)

message := "The configuration is still being applied"
reason := utils.CRconditionReasons.InProgress
if policyTimedOut {
message += ", but it timed out"
reason = utils.CRconditionReasons.TimedOut
}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.CRconditionTypes.ConfigurationApplied,
reason,
metav1.ConditionFalse,
message,
)
} else {
// No timeout is reported if all policies are in inform, just out of date.
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Time{}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.CRconditionTypes.ConfigurationApplied,
utils.CRconditionReasons.OutOfDate,
metav1.ConditionFalse,
"The configuration is out of date",
)
}
}
return
}

t.object.Status.Policies = targetPolicies
// Update the current policy status.
if updateErr := utils.UpdateK8sCRStatus(ctx, t.client, t.object); updateErr != nil {
return fmt.Errorf("failed to update status for ClusterRequest %s: %w", t.object.Name, updateErr)
// Update the ConfigurationApplied condition.
if allPoliciesCompliant {
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Time{}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.CRconditionTypes.ConfigurationApplied,
utils.CRconditionReasons.Completed,
metav1.ConditionTrue,
"The configuration is up to date",
)
return
}

return nil
clusterIsReadyForPolicyConfig, err := utils.ClusterIsReadyForPolicyConfig(
ctx, t.client, t.object.Status.ClusterInstanceRef.Name,
)
if err != nil {
return fmt.Errorf(
"error determining if the cluster is ready for policy configuration: %w", err)
}

if !clusterIsReadyForPolicyConfig {
t.logger.InfoContext(
ctx,
fmt.Sprintf(
"Cluster %s (%s) is not ready for policy configuration",
t.object.Status.ClusterInstanceRef.Name,
t.object.Status.ClusterInstanceRef.Name,
),
)
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.CRconditionTypes.ConfigurationApplied,
utils.CRconditionReasons.ClusterNotReady,
metav1.ConditionFalse,
"The Cluster is not yet ready",
)
return
}

if nonCompliantPolicyInEnforce {
policyTimedOut := t.hasPolicyConfigurationTimedOut(ctx)

message := "The configuration is still being applied"
reason := utils.CRconditionReasons.InProgress
if policyTimedOut {
message += ", but it timed out"
reason = utils.CRconditionReasons.TimedOut
}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.CRconditionTypes.ConfigurationApplied,
reason,
metav1.ConditionFalse,
message,
)
} else {
// No timeout is reported if all policies are in inform, just out of date.
t.object.Status.ClusterInstanceRef.NonCompliantAt = metav1.Time{}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.CRconditionTypes.ConfigurationApplied,
utils.CRconditionReasons.OutOfDate,
metav1.ConditionFalse,
"The configuration is out of date",
)
}

return
}

func (t *clusterRequestReconcilerTask) updateClusterInstanceProcessedStatus(ci *siteconfig.ClusterInstance, createOrPatchErr error) {
Expand Down
Loading

0 comments on commit a1cab9e

Please sign in to comment.