diff --git a/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp b/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp index 2117535ab7..829f5dd636 100644 --- a/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp @@ -264,7 +264,7 @@ void TStorageServiceActor::HandleCreateHandle( ctx, TFileStoreComponents::SERVICE, "create handle in follower %s", - msg->Record.DebugString().c_str()); + msg->Record.ShortDebugString().Quote().c_str()); auto [cookie, inflight] = CreateInFlightRequest( TRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), diff --git a/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp b/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp index eb2859ad77..d3a45ea995 100644 --- a/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp @@ -198,7 +198,7 @@ void TListNodesActor::HandleGetNodeAttrResponse( return; } - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::SERVICE, "GetNodeAttrResponse from follower: %s", diff --git a/cloud/filestore/libs/storage/service/service_ut.cpp b/cloud/filestore/libs/storage/service/service_ut.cpp index 3dfda60f99..cb292e6d5c 100644 --- a/cloud/filestore/libs/storage/service/service_ut.cpp +++ b/cloud/filestore/libs/storage/service/service_ut.cpp @@ -3144,6 +3144,82 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest) data.Size())->Record; UNIT_ASSERT_VALUES_EQUAL(data, readDataResponse.GetBuffer()); } + + Y_UNIT_TEST(ShouldHandleCreateNodeErrorFromFollowerUponCreateHandleViaLeader) + { + NProto::TStorageConfig config; + config.SetMultiTabletForwardingEnabled(true); + TTestEnv env({}, config); + env.CreateSubDomain("nfs"); + + ui32 nodeIdx = env.CreateNode("nfs"); + + const TString fsId = "test"; + const auto shard1Id = fsId + "-f1"; + const auto shard2Id = fsId + "-f2"; + + TServiceClient service(env.GetRuntime(), nodeIdx); + service.CreateFileStore(fsId, 1'000); + service.CreateFileStore(shard1Id, 1'000); + service.CreateFileStore(shard2Id, 1'000); + + ConfigureFollowers(service, fsId, shard1Id, shard2Id); + + auto headers = service.InitSession(fsId, "client"); + + auto error = MakeError(E_FS_INVALID_SESSION, "bad session"); + + env.GetRuntime().SetEventFilter( + [&] (TTestActorRuntimeBase&, TAutoPtr& event) { + switch (event->GetTypeRewrite()) { + case TEvService::EvCreateNodeResponse: { + auto* msg = + event->Get(); + if (error.GetCode()) { + msg->Record.MutableError()->CopyFrom(error); + } + + break; + } + } + + return false; + }); + + const ui64 requestId = 111; + + service.SendCreateHandleRequest( + headers, + fsId, + RootNodeId, + "file1", + TCreateHandleArgs::CREATE_EXL, + "", + requestId); + + auto response = service.RecvCreateHandleResponse(); + UNIT_ASSERT_VALUES_EQUAL_C( + error.GetCode(), + response->GetError().GetCode(), + FormatError(response->GetError())); + + error = {}; + + service.SendCreateHandleRequest( + headers, + fsId, + RootNodeId, + "file1", + TCreateHandleArgs::CREATE_EXL, + "", + requestId); + + response = service.RecvCreateHandleResponse(); + UNIT_ASSERT_VALUES_EQUAL_C( + error.GetCode(), + response->GetError().GetCode(), + FormatError(response->GetError())); + } } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor.cpp index ecd4b60542..7720513cac 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor.cpp @@ -836,6 +836,13 @@ STFUNC(TIndexTabletActor::StateZombie) IgnoreFunc(TEvLocal::TEvTabletMetrics); IgnoreFunc(TEvHiveProxy::TEvReassignTabletResponse); + HFunc( + TEvIndexTabletPrivate::TEvNodeCreatedInFollower, + HandleNodeCreatedInFollower); + HFunc( + TEvIndexTabletPrivate::TEvNodeCreatedInFollowerUponCreateHandle, + HandleNodeCreatedInFollowerUponCreateHandle); + default: HandleUnexpectedEvent(ev, TFileStoreComponents::TABLET); break; @@ -869,6 +876,13 @@ STFUNC(TIndexTabletActor::StateBroken) IgnoreFunc(TEvHiveProxy::TEvReassignTabletResponse); + HFunc( + TEvIndexTabletPrivate::TEvNodeCreatedInFollower, + HandleNodeCreatedInFollower); + HFunc( + TEvIndexTabletPrivate::TEvNodeCreatedInFollowerUponCreateHandle, + HandleNodeCreatedInFollowerUponCreateHandle); + default: HandleUnexpectedEvent(ev, TFileStoreComponents::TABLET); break; diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp index 369dec8ab8..a43cf3c74f 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp @@ -54,7 +54,6 @@ class TCreateNodeInFollowerUponCreateHandleActor final const NProto::TNode Attrs; NProto::THeaders Headers; NProto::TCreateHandleResponse Response; - NProto::TCreateNodeResponse FollowerResponse; public: TCreateNodeInFollowerUponCreateHandleActor( @@ -165,8 +164,7 @@ void TCreateNodeInFollowerUponCreateHandleActor::HandleCreateNodeResponse( FollowerId.c_str(), FollowerName.c_str()); - FollowerResponse = std::move(msg->Record); - Response.MutableNodeAttr()->CopyFrom(FollowerResponse.GetNode()); + *Response.MutableNodeAttr() = std::move(*msg->Record.MutableNode()); ReplyAndDie(ctx, {}); } @@ -188,13 +186,15 @@ void TCreateNodeInFollowerUponCreateHandleActor::ReplyAndDie( *Response.MutableError() = std::move(error); } + // TODO(#1350): reply directly to the client (not to the tablet) upon + // poisoning + // or keep this RequestInfo in the ActiveTransactions list until this event + // gets processed by the tablet using TResponse = TEvIndexTabletPrivate::TEvNodeCreatedInFollowerUponCreateHandle; ctx.Send(ParentId, std::make_unique( std::move(RequestInfo), - std::move(FollowerResponse), - std::move(Response) - )); + std::move(Response))); Die(ctx); } @@ -500,6 +500,10 @@ void TIndexTabletActor::CompleteTx_CreateHandle( { RemoveTransaction(*args.RequestInfo); + if (!HasError(args.Error)) { + CommitDupCacheEntry(args.SessionId, args.RequestId); + } + if (args.IsNewFollowerNode && !HasError(args.Error)) { LOG_INFO(ctx, TFileStoreComponents::TABLET, "%s Creating node in follower upon CreateHandle: %s, %s", @@ -527,7 +531,6 @@ void TIndexTabletActor::CompleteTx_CreateHandle( std::make_unique(args.Error); if (!HasError(args.Error)) { - CommitDupCacheEntry(args.SessionId, args.RequestId); response->Record = std::move(args.Response); } @@ -547,12 +550,8 @@ void TIndexTabletActor::HandleNodeCreatedInFollowerUponCreateHandle( { auto* msg = ev->Get(); - auto response = std::make_unique( - msg->FollowerCreateNodeResponse.GetError()); - - if (!HasError(response->GetError())) { - response->Record = std::move(msg->CreateHandleResponse); - } + auto response = std::make_unique(); + response->Record = std::move(msg->CreateHandleResponse); CompleteResponse( response->Record, diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp index cc8099d6ea..4bf0496c35 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp @@ -81,7 +81,6 @@ class TCreateNodeInFollowerActor final const TActorId ParentId; NProto::TCreateNodeRequest Request; NProto::TCreateNodeResponse Response; - NProto::TCreateNodeResponse FollowerResponse; public: TCreateNodeInFollowerActor( @@ -186,7 +185,7 @@ void TCreateNodeInFollowerActor::HandleCreateNodeResponse( FollowerId.c_str(), FollowerName.c_str()); - FollowerResponse = std::move(msg->Record); + *Response.MutableNode() = std::move(*msg->Record.MutableNode()); ReplyAndDie(ctx, {}); } @@ -212,9 +211,7 @@ void TCreateNodeInFollowerActor::ReplyAndDie( using TResponse = TEvIndexTabletPrivate::TEvNodeCreatedInFollower; ctx.Send(ParentId, std::make_unique( std::move(RequestInfo), - std::move(FollowerResponse), - std::move(Response) - )); + std::move(Response))); Die(ctx); } @@ -520,14 +517,8 @@ void TIndexTabletActor::HandleNodeCreatedInFollower( { auto* msg = ev->Get(); - auto response = std::make_unique( - msg->FollowerCreateNodeResponse.GetError()); - - if (!HasError(response->GetError())) { - response->Record = std::move(msg->CreateNodeResponse); - *response->Record.MutableNode() = - std::move(*msg->FollowerCreateNodeResponse.MutableNode()); - } + auto response = std::make_unique(); + response->Record = std::move(msg->CreateNodeResponse); CompleteResponse( response->Record, diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_destroysession.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_destroysession.cpp index 751189ce3b..61242a02f0 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_destroysession.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_destroysession.cpp @@ -165,6 +165,8 @@ void TIndexTabletActor::CompleteTx_DestroySession( { RemoveTransaction(*args.RequestInfo); + // TODO(#1350): destroy follower sessions + auto response = std::make_unique(); NCloud::Reply(ctx, *args.RequestInfo, std::move(response)); } diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp index 7d7591677b..1c74980ac1 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_unlinknode.cpp @@ -152,6 +152,7 @@ void TIndexTabletActor::ExecuteTx_UnlinkNode( // TODO(#1350): unlink external nodes if (args.ChildRef->FollowerId) { + // XXX ref not unlinked as well return; } diff --git a/cloud/filestore/libs/storage/tablet/tablet_private.h b/cloud/filestore/libs/storage/tablet/tablet_private.h index e0a6b16b2f..c09331c538 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_private.h +++ b/cloud/filestore/libs/storage/tablet/tablet_private.h @@ -502,15 +502,12 @@ struct TEvIndexTabletPrivate struct TNodeCreatedInFollower { TRequestInfoPtr RequestInfo; - NProto::TCreateNodeResponse FollowerCreateNodeResponse; NProto::TCreateNodeResponse CreateNodeResponse; TNodeCreatedInFollower( TRequestInfoPtr requestInfo, - NProto::TCreateNodeResponse followerCreateNodeResponse, NProto::TCreateNodeResponse createNodeResponse) : RequestInfo(std::move(requestInfo)) - , FollowerCreateNodeResponse(std::move(followerCreateNodeResponse)) , CreateNodeResponse(std::move(createNodeResponse)) { } @@ -523,15 +520,12 @@ struct TEvIndexTabletPrivate struct TNodeCreatedInFollowerUponCreateHandle { TRequestInfoPtr RequestInfo; - NProto::TCreateNodeResponse FollowerCreateNodeResponse; NProto::TCreateHandleResponse CreateHandleResponse; TNodeCreatedInFollowerUponCreateHandle( TRequestInfoPtr requestInfo, - NProto::TCreateNodeResponse followerCreateNodeResponse, NProto::TCreateHandleResponse createHandleResponse) : RequestInfo(std::move(requestInfo)) - , FollowerCreateNodeResponse(std::move(followerCreateNodeResponse)) , CreateHandleResponse(std::move(createHandleResponse)) { } diff --git a/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.cpp b/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.cpp index dca310a937..6caced6b4a 100644 --- a/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.cpp +++ b/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.cpp @@ -15,10 +15,6 @@ namespace { //////////////////////////////////////////////////////////////////////////////// -constexpr TDuration PipeInactivityTimeout = TDuration::Minutes(1); - -//////////////////////////////////////////////////////////////////////////////// - std::unique_ptr CreateTabletPipeClientCache( const TStorageConfig& config) { @@ -122,7 +118,7 @@ void TIndexTabletProxyActor::OnConnectionError( // will wait for tablet to recover conn.State = FAILED; - CancelActiveRequests(ctx, conn); + CancelActiveRequests(conn); ProcessPendingRequests(ctx, conn); } @@ -138,9 +134,7 @@ void TIndexTabletProxyActor::ProcessPendingRequests( } } -void TIndexTabletProxyActor::CancelActiveRequests( - const TActorContext& ctx, - TConnection& conn) +void TIndexTabletProxyActor::CancelActiveRequests(TConnection& conn) { for (auto it = ActiveRequests.begin(); it != ActiveRequests.end(); ) { if (it->second.ConnectionId == conn.Id) { @@ -152,9 +146,6 @@ void TIndexTabletProxyActor::CancelActiveRequests( ++it; } } - - conn.RequestsInflight = 0; - conn.LastActivity = ctx.Now(); } void TIndexTabletProxyActor::PostponeRequest( @@ -199,7 +190,6 @@ void TIndexTabletProxyActor::ForwardRequest( ActiveRequests.emplace( requestId, TActiveRequest(conn.Id, IEventHandlePtr(ev.Release()))); - ++conn.RequestsInflight; } void TIndexTabletProxyActor::DescribeFileStore( @@ -219,56 +209,6 @@ void TIndexTabletProxyActor::DescribeFileStore( //////////////////////////////////////////////////////////////////////////////// -void TIndexTabletProxyActor::ScheduleConnectionShutdown( - const TActorContext& ctx, - TConnection& conn) -{ - if (!conn.ActivityCheckScheduled && - conn.Requests.empty() && - !conn.RequestsInflight) - { - conn.ActivityCheckScheduled = true; - ctx.Schedule(PipeInactivityTimeout, new TEvents::TEvWakeup(conn.Id)); - } -} - -void TIndexTabletProxyActor::HandleWakeup( - const TEvents::TEvWakeup::TPtr& ev, - const TActorContext& ctx) -{ - const auto* msg = ev->Get(); - - auto it = ConnectionById.find(msg->Tag); - if (it == ConnectionById.end()) { - // connection is already closed, nothing to do - return; - } - - TConnection* conn = it->second; - conn->ActivityCheckScheduled = false; - auto now = ctx.Now(); - - if (conn->Requests.empty() && - !conn->RequestsInflight && - conn->LastActivity < now - PipeInactivityTimeout) - { - LOG_INFO(ctx, TFileStoreComponents::TABLET_PROXY, - "Remove connection to tablet %lu for file system %s", - conn->TabletId, - conn->FileSystemId.c_str()); - ClientCache->Shutdown(ctx, conn->TabletId); - ConnectionById.erase(conn->Id); - ConnectionByTablet.erase(conn->TabletId); - Connections.erase(conn->FileSystemId); - } else { - if (conn->LastActivity >= now - PipeInactivityTimeout) { - auto timeEstimate = conn->LastActivity + PipeInactivityTimeout - now; - ctx.Schedule(timeEstimate, new TEvents::TEvWakeup(conn->Id)); - conn->ActivityCheckScheduled = true; - } - } -} - void TIndexTabletProxyActor::HandleClientConnected( TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) @@ -290,7 +230,7 @@ void TIndexTabletProxyActor::HandleClientConnected( msg->TabletId, FormatError(error).c_str()); - CancelActiveRequests(ctx, *conn); + CancelActiveRequests(*conn); DestroyConnection(ctx, *conn, error); return; } @@ -446,13 +386,6 @@ void TIndexTabletProxyActor::HandleResponse( auto* conn = ConnectionById[it->second.ConnectionId]; Y_ABORT_UNLESS(conn); - conn->LastActivity = ctx.Now(); - --conn->RequestsInflight; - if (conn->Requests.empty() && - !conn->RequestsInflight) { - ScheduleConnectionShutdown(ctx, *conn); - } - ctx.Send(event); ActiveRequests.erase(it); } @@ -461,13 +394,13 @@ void TIndexTabletProxyActor::HandlePoisonPill( const TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx) { - Y_UNUSED(ev); - for (const auto& conn: Connections) { ClientCache->Shutdown(ctx, conn.second.TabletId); } - Die(ctx); + LOG_ERROR(ctx, TFileStoreComponents::TABLET_PROXY, + "Someone tried to kill me: %s", + ev->Sender.ToString().c_str()); } bool TIndexTabletProxyActor::HandleRequests(STFUNC_SIG) @@ -534,7 +467,6 @@ bool TIndexTabletProxyActor::LogLateMessage(STFUNC_SIG) STFUNC(TIndexTabletProxyActor::StateWork) { switch (ev->GetTypeRewrite()) { - HFunc(TEvents::TEvWakeup, HandleWakeup); HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); HFunc(TEvTabletPipe::TEvClientConnected, HandleClientConnected); diff --git a/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.h b/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.h index cb85e8ee63..f53346693f 100644 --- a/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.h +++ b/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_actor.h @@ -52,9 +52,6 @@ class TIndexTabletProxyActor final TString Path; TDeque Requests; - TInstant LastActivity; - ui64 RequestsInflight = 0; - bool ActivityCheckScheduled = false; TConnection(ui64 id, TString fileSystemId) : Id(id) @@ -114,9 +111,7 @@ class TIndexTabletProxyActor final const NActors::TActorContext& ctx, TConnection& conn); - void CancelActiveRequests( - const NActors::TActorContext& ctx, - TConnection& conn); + void CancelActiveRequests(TConnection& conn); void PostponeRequest( const NActors::TActorContext& ctx, @@ -134,10 +129,6 @@ class TIndexTabletProxyActor final TConnection& conn); private: - void ScheduleConnectionShutdown( - const NActors::TActorContext& ctx, - TConnection& conn); - void HandleClientConnected( NKikimr::TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); @@ -150,10 +141,6 @@ class TIndexTabletProxyActor final const TEvSSProxy::TEvDescribeFileStoreResponse::TPtr& ev, const NActors::TActorContext& ctx); - void HandleWakeup( - const NActors::TEvents::TEvWakeup::TPtr& ev, - const NActors::TActorContext& ctx); - void HandlePoisonPill( const NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); diff --git a/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_ut.cpp b/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_ut.cpp index c47b3da33d..f3cc5b4264 100644 --- a/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_ut.cpp +++ b/cloud/filestore/libs/storage/tablet_proxy/tablet_proxy_ut.cpp @@ -91,6 +91,28 @@ Y_UNIT_TEST_SUITE(TIndexTabletProxyTest) return connections.empty(); }}); } + + Y_UNIT_TEST(ShouldNotDie) + { + TTestEnv env; + env.CreateSubDomain("nfs"); + + ui32 nodeIdx = env.CreateNode("nfs"); + + TSSProxyClient ssProxy( + env.GetStorageConfig(), + env.GetRuntime(), + nodeIdx); + ssProxy.CreateFileStore("test", 1000); + + TIndexTabletProxyClient tabletProxy(env.GetRuntime(), nodeIdx); + tabletProxy.WaitReady("test"); + tabletProxy.SendRequest( + MakeIndexTabletProxyServiceId(), + std::make_unique()); + env.GetRuntime().DispatchEvents({}, TDuration::MilliSeconds(100)); + tabletProxy.WaitReady("test"); + } } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/testlib/service_client.h b/cloud/filestore/libs/storage/testlib/service_client.h index 7493f845d1..3e0b90473c 100644 --- a/cloud/filestore/libs/storage/testlib/service_client.h +++ b/cloud/filestore/libs/storage/testlib/service_client.h @@ -252,10 +252,12 @@ class TServiceClient ui64 nodeId, const TString& name, ui32 flags, - const TString& followerId = "") + const TString& followerId = "", + const ui64 requestId = 0) { auto request = std::make_unique(); headers.Fill(request->Record); + request->Record.MutableHeaders()->SetRequestId(requestId); request->Record.SetFileSystemId(fileSystemId); request->Record.SetNodeId(nodeId); request->Record.SetName(name);