From 8da2a1fcd1b12988b7da2780b735dad316f82ee0 Mon Sep 17 00:00:00 2001 From: Mikhail Montsev Date: Tue, 24 Sep 2024 16:30:36 +0100 Subject: [PATCH] blockstore & filestore SDK retriable error codes should be in sync with cloud/storage/core/libs/common/error.cpp (#2119) --- .../blockstore/public/sdk/go/client/error.go | 28 ++++++++++++++++-- .../public/sdk/python/client/error.py | 29 +++++++++++++++---- cloud/filestore/public/sdk/go/client/error.go | 12 ++++++-- .../public/sdk/python/client/error.py | 14 +++++---- 4 files changed, 68 insertions(+), 15 deletions(-) diff --git a/cloud/blockstore/public/sdk/go/client/error.go b/cloud/blockstore/public/sdk/go/client/error.go index fc7b5d6889..e366718142 100644 --- a/cloud/blockstore/public/sdk/go/client/error.go +++ b/cloud/blockstore/public/sdk/go/client/error.go @@ -44,8 +44,14 @@ func (e *ClientError) IsRetriable() bool { } switch e.Facility() { - case FACILITY_GRPC, FACILITY_SYSTEM: - // system/network errors should be retriable + case FACILITY_GRPC: + if e.Code == E_GRPC_UNIMPLEMENTED { + return false + } + // network errors should be retriable + return true + case FACILITY_SYSTEM: + // system errors should be retriable return true case FACILITY_KIKIMR: switch e.Status() { @@ -59,6 +65,24 @@ func (e *ClientError) IsRetriable() bool { 20: // NKikimrProto::NOT_YET return true } + case FACILITY_SCHEMESHARD: + switch e.Status() { + case + 13, // NKikimrScheme::StatusNotAvailable + 8: // NKikimrScheme::StatusMultipleModifications + return true + } + case FACILITY_TXPROXY: + switch e.Status() { + case + 16, // NKikimr::NTxProxy::TResultStatus::ProxyNotReady + 20, // NKikimr::NTxProxy::TResultStatus::ProxyShardNotAvailable + 21, // NKikimr::NTxProxy::TResultStatus::ProxyShardTryLater + 22, // NKikimr::NTxProxy::TResultStatus::ProxyShardOverloaded + 51, // NKikimr::NTxProxy::TResultStatus::ExecTimeout + 55: // NKikimr::NTxProxy::TResultStatus::ExecResultUnavailable + return true + } } // any other errors should not be retried automatically diff --git a/cloud/blockstore/public/sdk/python/client/error.py b/cloud/blockstore/public/sdk/python/client/error.py index 51aa256d03..0a3629298a 100644 --- a/cloud/blockstore/public/sdk/python/client/error.py +++ b/cloud/blockstore/public/sdk/python/client/error.py @@ -65,11 +65,14 @@ def is_retriable(self) -> bool: facility = self.facility - if facility in [ - EFacility.FACILITY_GRPC.value, - EFacility.FACILITY_SYSTEM.value, - ]: - # system/network errors should be retriable + if facility == EFacility.FACILITY_GRPC.value: + if self.code == EResult.E_GRPC_UNIMPLEMENTED.value: + return False + # network errors should be retriable + return True + + if facility == EFacility.FACILITY_SYSTEM.value: + # system errors should be retriable return True if facility == EFacility.FACILITY_KIKIMR.value and self.status in [ @@ -83,6 +86,22 @@ def is_retriable(self) -> bool: ]: return True + if facility == EFacility.FACILITY_SCHEMESHARD.value and self.status in [ + 13, # NKikimrScheme::StatusNotAvailable + 8, # NKikimrScheme::StatusMultipleModifications + ]: + return True + + if facility == EFacility.FACILITY_TXPROXY.value and self.status in [ + 16, # NKikimr::NTxProxy::TResultStatus::ProxyNotReady + 20, # NKikimr::NTxProxy::TResultStatus::ProxyShardNotAvailable + 21, # NKikimr::NTxProxy::TResultStatus::ProxyShardTryLater + 22, # NKikimr::NTxProxy::TResultStatus::ProxyShardOverloaded + 51, # NKikimr::NTxProxy::TResultStatus::ExecTimeout: + 55, # NKikimr::NTxProxy::TResultStatus::ExecResultUnavailable: + ]: + return True + # any other errors should not be retried automatically return False diff --git a/cloud/filestore/public/sdk/go/client/error.go b/cloud/filestore/public/sdk/go/client/error.go index 27936c3415..437895e0c5 100644 --- a/cloud/filestore/public/sdk/go/client/error.go +++ b/cloud/filestore/public/sdk/go/client/error.go @@ -38,14 +38,20 @@ func (e *ClientError) Error() string { func (e *ClientError) IsRetriable() bool { switch e.Code { - case E_REJECTED, E_TIMEOUT: + case E_REJECTED, E_TIMEOUT, E_FS_OUT_OF_SPACE: // special error code for retries return true } switch e.Facility() { - case FACILITY_GRPC, FACILITY_SYSTEM: - // system/network errors should be retriable + case FACILITY_GRPC: + if e.Code == E_GRPC_UNIMPLEMENTED { + return false + } + // network errors should be retriable + return true + case FACILITY_SYSTEM: + // system errors should be retriable return true case FACILITY_KIKIMR: switch e.Status() { diff --git a/cloud/filestore/public/sdk/python/client/error.py b/cloud/filestore/public/sdk/python/client/error.py index 821143c7e4..90bf50267b 100644 --- a/cloud/filestore/public/sdk/python/client/error.py +++ b/cloud/filestore/public/sdk/python/client/error.py @@ -57,16 +57,20 @@ def is_retriable(self): if self.code in [ EResult.E_REJECTED.value, EResult.E_TIMEOUT.value, + EResult.E_FS_OUT_OF_SPACE.value, ]: return True facility = self.facility - if facility in [ - EFacility.FACILITY_GRPC.value, - EFacility.FACILITY_SYSTEM.value, - ]: - # system/network errors should be retriable + if facility == EFacility.FACILITY_GRPC.value: + if self.code == EResult.E_GRPC_UNIMPLEMENTED.value: + return False + # network errors should be retriable + return True + + if facility == EFacility.FACILITY_SYSTEM.value: + # system errors should be retriable return True if facility == EFacility.FACILITY_KIKIMR.value and self.status in [