Skip to content

Commit

Permalink
feat(verification): improve transient error handling (#1650)
Browse files Browse the repository at this point in the history
Signed-off-by: Hidde Beydals <[email protected]>
  • Loading branch information
hiddeco committed Apr 9, 2024
1 parent 33e702a commit 0c5b3b7
Show file tree
Hide file tree
Showing 8 changed files with 436 additions and 69 deletions.
6 changes: 6 additions & 0 deletions api/v1alpha1/stage_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,12 @@ type VerificationInfo struct {
AnalysisRun *AnalysisRunReference `json:"analysisRun,omitempty" protobuf:"bytes,3,opt,name=analysisRun"`
}

// HasAnalysisRun returns a bool indicating whether the VerificationInfo has an
// associated AnalysisRun.
func (v *VerificationInfo) HasAnalysisRun() bool {
return v != nil && v.AnalysisRun != nil
}

type VerificationInfoStack []VerificationInfo

// UpdateOrPush updates the VerificationInfo with the same ID as the provided
Expand Down
31 changes: 31 additions & 0 deletions api/v1alpha1/stage_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,37 @@ import (
"github.com/stretchr/testify/require"
)

func TestVerificationInfo_HasAnalysisRun(t *testing.T) {
testCases := []struct {
name string
info *VerificationInfo
expectedResult bool
}{
{
name: "VerificationInfo is nil",
info: nil,
expectedResult: false,
},
{
name: "AnalysisRun is nil",
info: &VerificationInfo{},
expectedResult: false,
},
{
name: "AnalysisRun is not nil",
info: &VerificationInfo{
AnalysisRun: &AnalysisRunReference{},
},
expectedResult: true,
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
require.Equal(t, testCase.expectedResult, testCase.info.HasAnalysisRun())
})
}
}

func TestFreightReferenceStackUpdateOrPush(t *testing.T) {
testCases := []struct {
name string
Expand Down
28 changes: 23 additions & 5 deletions internal/controller/stages/stages.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ type reconciler struct {
startVerificationFn func(
context.Context,
*kargoapi.Stage,
) *kargoapi.VerificationInfo
) (*kargoapi.VerificationInfo, error)

abortVerificationFn func(
context.Context,
Expand All @@ -96,7 +96,7 @@ type reconciler struct {
getVerificationInfoFn func(
context.Context,
*kargoapi.Stage,
) *kargoapi.VerificationInfo
) (*kargoapi.VerificationInfo, error)

getAnalysisTemplateFn func(
context.Context,
Expand Down Expand Up @@ -759,14 +759,32 @@ func (r *reconciler) syncNormalStage(
// check if verification step is necessary and if yes execute
// step irrespective of phase
if status.Phase == kargoapi.StagePhaseVerifying || status.Phase == kargoapi.StagePhaseNotApplicable {
if status.CurrentFreight.VerificationInfo == nil {
if !status.CurrentFreight.VerificationInfo.HasAnalysisRun() {
if status.Health == nil || status.Health.Status == kargoapi.HealthStateHealthy {
log.Debug("starting verification")
status.CurrentFreight.VerificationInfo = r.startVerificationFn(ctx, stage)
var err error
if status.CurrentFreight.VerificationInfo, err = r.startVerificationFn(
ctx,
stage,
); err != nil && !status.CurrentFreight.VerificationInfo.HasAnalysisRun() {
status.CurrentFreight.VerificationHistory.UpdateOrPush(
*status.CurrentFreight.VerificationInfo,
)
return status, fmt.Errorf("error starting verification: %w", err)
}
}
} else {
log.Debug("checking verification results")
status.CurrentFreight.VerificationInfo = r.getVerificationInfoFn(ctx, stage)
var err error
if status.CurrentFreight.VerificationInfo, err = r.getVerificationInfoFn(
ctx,
stage,
); err != nil && status.CurrentFreight.VerificationInfo.HasAnalysisRun() {
status.CurrentFreight.VerificationHistory.UpdateOrPush(
*status.CurrentFreight.VerificationInfo,
)
return status, fmt.Errorf("error getting verification info: %w", err)
}

// Abort the verification if it's still running and the Stage has
// been marked to do so.
Expand Down
176 changes: 162 additions & 14 deletions internal/controller/stages/stages_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,15 +364,15 @@ func TestSyncNormalStage(t *testing.T) {
startVerificationFn: func(
context.Context,
*kargoapi.Stage,
) *kargoapi.VerificationInfo {
) (*kargoapi.VerificationInfo, error) {
return &kargoapi.VerificationInfo{
ID: "new-fake-id",
Phase: kargoapi.VerificationPhasePending,
Message: "Awaiting reconfirmation",
AnalysisRun: &kargoapi.AnalysisRunReference{
Name: "new-fake-analysis-run",
},
}
}, nil
},
},
assertions: func(
Expand Down Expand Up @@ -477,11 +477,11 @@ func TestSyncNormalStage(t *testing.T) {
startVerificationFn: func(
context.Context,
*kargoapi.Stage,
) *kargoapi.VerificationInfo {
) (*kargoapi.VerificationInfo, error) {
return &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhaseError,
Message: "something went wrong",
}
}, nil
},
},
assertions: func(
Expand Down Expand Up @@ -517,6 +517,71 @@ func TestSyncNormalStage(t *testing.T) {
},
},

{
name: "retryable error starting verification",
stage: &kargoapi.Stage{
Spec: &kargoapi.StageSpec{
PromotionMechanisms: &kargoapi.PromotionMechanisms{},
Verification: &kargoapi.Verification{},
},
Status: kargoapi.StageStatus{
Phase: kargoapi.StagePhaseVerifying,
CurrentFreight: &kargoapi.FreightReference{},
},
},
reconciler: &reconciler{
hasNonTerminalPromotionsFn: noNonTerminalPromotionsFn,
checkHealthFn: func(
context.Context,
kargoapi.FreightReference,
[]kargoapi.ArgoCDAppUpdate,
) *kargoapi.Health {
return nil
},
startVerificationFn: func(
context.Context,
*kargoapi.Stage,
) (*kargoapi.VerificationInfo, error) {
return &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhaseError,
Message: "something went wrong",
}, errors.New("retryable error")
},
},
assertions: func(
t *testing.T,
initialStatus kargoapi.StageStatus,
newStatus kargoapi.StageStatus,
err error,
) {
require.ErrorContains(t, err, "retryable error")
require.NotNil(t, newStatus.CurrentFreight)
require.Equal(t, kargoapi.StagePhaseVerifying, newStatus.Phase)

expectInfo := kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhaseError,
Message: "something went wrong",
}

require.Equal(
t,
&expectInfo,
newStatus.CurrentFreight.VerificationInfo,
)
require.Equal(
t,
kargoapi.VerificationInfoStack{expectInfo},
newStatus.CurrentFreight.VerificationHistory,
)

// Everything else should be returned unchanged
newStatus.CurrentFreight.VerificationInfo = nil
newStatus.CurrentFreight.VerificationHistory = nil
newStatus.Phase = initialStatus.Phase
require.Equal(t, initialStatus, newStatus)
},
},

{
name: "error checking verification result",
stage: &kargoapi.Stage{
Expand All @@ -527,7 +592,13 @@ func TestSyncNormalStage(t *testing.T) {
Status: kargoapi.StageStatus{
Phase: kargoapi.StagePhaseVerifying,
CurrentFreight: &kargoapi.FreightReference{
VerificationInfo: &kargoapi.VerificationInfo{},
VerificationInfo: &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhasePending,
AnalysisRun: &kargoapi.AnalysisRunReference{
Name: "fake-analysis-run",
Namespace: "fake-namespace",
},
},
},
},
},
Expand All @@ -540,11 +611,11 @@ func TestSyncNormalStage(t *testing.T) {
) *kargoapi.Health {
return nil
},
getVerificationInfoFn: func(_ context.Context, _ *kargoapi.Stage) *kargoapi.VerificationInfo {
getVerificationInfoFn: func(_ context.Context, _ *kargoapi.Stage) (*kargoapi.VerificationInfo, error) {
return &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhaseError,
Message: "something went wrong",
}
}, nil
},
},
assertions: func(
Expand Down Expand Up @@ -572,6 +643,77 @@ func TestSyncNormalStage(t *testing.T) {
},
},

{
name: "retryable error checking verification result",
stage: &kargoapi.Stage{
Spec: &kargoapi.StageSpec{
PromotionMechanisms: &kargoapi.PromotionMechanisms{},
Verification: &kargoapi.Verification{},
},
Status: kargoapi.StageStatus{
Phase: kargoapi.StagePhaseVerifying,
CurrentFreight: &kargoapi.FreightReference{
VerificationInfo: &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhasePending,
AnalysisRun: &kargoapi.AnalysisRunReference{
Name: "fake-analysis-run",
Namespace: "fake-namespace",
},
},
},
},
},
reconciler: &reconciler{
hasNonTerminalPromotionsFn: noNonTerminalPromotionsFn,
checkHealthFn: func(
context.Context,
kargoapi.FreightReference,
[]kargoapi.ArgoCDAppUpdate,
) *kargoapi.Health {
return nil
},
getVerificationInfoFn: func(_ context.Context, _ *kargoapi.Stage) (*kargoapi.VerificationInfo, error) {
return &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhaseError,
Message: "something went wrong",
AnalysisRun: &kargoapi.AnalysisRunReference{
Name: "fake-analysis-run",
Namespace: "fake-namespace",
},
}, errors.New("retryable error")
},
},
assertions: func(
t *testing.T,
initialStatus kargoapi.StageStatus,
newStatus kargoapi.StageStatus,
err error,
) {
require.ErrorContains(t, err, "retryable error")
require.NotNil(t, newStatus.CurrentFreight)
require.Equal(
t,
&kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhaseError,
Message: "something went wrong",
AnalysisRun: &kargoapi.AnalysisRunReference{
Name: "fake-analysis-run",
Namespace: "fake-namespace",
},
},
newStatus.CurrentFreight.VerificationInfo,
)
require.Len(t, newStatus.CurrentFreight.VerificationHistory, 1)

// Phase should not be changed to Steady
require.Equal(t, kargoapi.StagePhaseVerifying, newStatus.Phase)
// Everything else should be unchanged
newStatus.Phase = initialStatus.Phase
newStatus.CurrentFreight = initialStatus.CurrentFreight
require.Equal(t, initialStatus, newStatus)
},
},

{
name: "verification aborted",
stage: &kargoapi.Stage{
Expand Down Expand Up @@ -609,8 +751,8 @@ func TestSyncNormalStage(t *testing.T) {
getVerificationInfoFn: func(
_ context.Context,
s *kargoapi.Stage,
) *kargoapi.VerificationInfo {
return s.Status.CurrentFreight.VerificationInfo
) (*kargoapi.VerificationInfo, error) {
return s.Status.CurrentFreight.VerificationInfo, nil
},
abortVerificationFn: func(
context.Context,
Expand Down Expand Up @@ -681,10 +823,10 @@ func TestSyncNormalStage(t *testing.T) {
getVerificationInfoFn: func(
_ context.Context,
s *kargoapi.Stage,
) *kargoapi.VerificationInfo {
) (*kargoapi.VerificationInfo, error) {
i := s.Status.CurrentFreight.VerificationInfo.DeepCopy()
i.Phase = kargoapi.VerificationPhaseError
return i
return i, nil
},
abortVerificationFn: func(
context.Context,
Expand Down Expand Up @@ -1172,7 +1314,13 @@ func TestSyncNormalStage(t *testing.T) {
Phase: kargoapi.StagePhaseVerifying,
CurrentPromotion: &kargoapi.PromotionInfo{},
CurrentFreight: &kargoapi.FreightReference{
VerificationInfo: &kargoapi.VerificationInfo{},
VerificationInfo: &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhasePending,
AnalysisRun: &kargoapi.AnalysisRunReference{
Name: "fake-analysis-run",
Namespace: "fake-namespace",
},
},
},
},
},
Expand All @@ -1190,15 +1338,15 @@ func TestSyncNormalStage(t *testing.T) {
getVerificationInfoFn: func(
context.Context,
*kargoapi.Stage,
) *kargoapi.VerificationInfo {
) (*kargoapi.VerificationInfo, error) {
return &kargoapi.VerificationInfo{
Phase: kargoapi.VerificationPhaseSuccessful,
AnalysisRun: &kargoapi.AnalysisRunReference{
Name: "fake-analysis-run",
Namespace: "fake-namespace",
Phase: string(rollouts.AnalysisPhaseSuccessful),
},
}
}, nil
},
verifyFreightInStageFn: func(context.Context, string, string, string) error {
return nil
Expand Down
Loading

0 comments on commit 0c5b3b7

Please sign in to comment.