From bf267b3bf9e98f7dea84f2ac5748020057f9dbdf Mon Sep 17 00:00:00 2001 From: Luke Robison Date: Fri, 20 Sep 2024 01:53:09 +0000 Subject: [PATCH] coll/han/alltoallv: Fix logic around waitany The logic for waitany was flawed. While we could wait for either a send or a receive, we cannot consume receives in any order, and likewise for sends. Fix this by simply ping-ponging between waiting for sends or receives and cycling when there is nothing to wait on. Signed-off-by: Luke Robison --- ompi/mca/coll/han/coll_han_alltoallv.c | 42 ++++++++++++++------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/ompi/mca/coll/han/coll_han_alltoallv.c b/ompi/mca/coll/han/coll_han_alltoallv.c index 7e548685a59..309b97a809f 100644 --- a/ompi/mca/coll/han/coll_han_alltoallv.c +++ b/ompi/mca/coll/han/coll_han_alltoallv.c @@ -251,10 +251,16 @@ static inline int alltoallv_sendrecv_w_direct_for_debugging( if (jloop < nreqs){ jreq = jloop; have_completion = 0; + requests[jreq] = MPI_REQUEST_NULL; } else { have_completion = 1; - rc = ompi_request_wait_any( nreqs, requests, &jreq, MPI_STATUS_IGNORE ); + jreq = jloop%nreqs; + if (requests[jreq] == MPI_REQUEST_NULL) { + continue; + } + rc = ompi_request_wait(&requests[jreq], MPI_STATUS_IGNORE); if (rc) break; + requests[jreq] = MPI_REQUEST_NULL; } int ii_send_req = jreq >= jfirst_sendreq; if (have_completion) { @@ -264,7 +270,6 @@ static inline int alltoallv_sendrecv_w_direct_for_debugging( jrecvs_completed++; } - requests[jreq] = &ompi_request_null.request; if (ii_send_req && jsends_posted < ntypes_send) { rc = ompi_datatype_create_contiguous( 1, (ompi_datatype_t*)send_types[jsends_posted], &yuck_ompi_dtype_from_opal ); if (rc) break; @@ -348,7 +353,6 @@ static int alltoallv_sendrecv_w( int jreq; int jfirst_sendreq = nbufs/2 + nbufs%2; size_t recv_post_remaining_bytes; - int rc; size_t jloop = 0; size_t send_pack_bytes_remaining = 0; @@ -408,6 +412,7 @@ static int alltoallv_sendrecv_w( */ jtype_send = -1; jtype_recv = -1; + int sequential_continues = 0; for (jloop=0; ; jloop++) { int ii_more_sends_to_post = jtype_send < ntypes_send || send_pack_bytes_remaining > 0; int ii_more_sends_to_complete = nsend_req_pending > 0; @@ -424,24 +429,23 @@ static int alltoallv_sendrecv_w( if (jloop >= nreqs) { /* Common Case: */ - /* wait for any send or recv to complete */ - rc = ompi_request_wait_any(nreqs, requests, &jreq, MPI_STATUS_IGNORE); - if (rc != 0) { - opal_output_verbose(1, mca_coll_han_component.han_output, - "ompi_request_wait_any returned error code %d in alltoallv_sendrecv_w (loop=%ld)\n",rc,jloop); - return rc; + /* wait for the send or recv to complete */ + jreq = jloop%nreqs; + if (requests[jreq] == MPI_REQUEST_NULL) { + if (++sequential_continues > nbufs) { + opal_output_verbose(1, mca_coll_han_component.han_output, + "ERROR: no active requests to wait on! Loop=%ld: %d %d %d %d\n", + jloop, + ii_more_sends_to_post, ii_more_sends_to_complete, + ii_more_recvs_to_post, ii_more_recvs_to_complete ); + return MPI_ERR_INTERN; + } + continue; } + sequential_continues = 0; + ompi_request_wait( &requests[jreq], MPI_STATUS_IGNORE ); have_completion = 1; - if (jreq == MPI_UNDEFINED) { - opal_output_verbose(1, mca_coll_han_component.han_output, - "ERROR: no active requests to wait on! Loop=%ld: %d %d %d %d\n", - jloop, - ii_more_sends_to_post, ii_more_sends_to_complete, - ii_more_recvs_to_post, ii_more_recvs_to_complete ); - have_completion = 0; - jreq = jloop % nbufs; - return MPI_ERR_INTERN; - } + requests[jreq] = MPI_REQUEST_NULL; } else { /* priming the loop: post sends or recvs while have_completion=0.