From 5b54ceaa402858e1d43823864c59e57946c4ad56 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Thu, 27 Jul 2023 14:04:10 -0700 Subject: [PATCH] Fixes request waiting for UCX DTL --- src/dtl/ucx_dtl.c | 16 ++++++++++------ src/modules/dyad.c | 4 ++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c index 54bcb1ad..77bf25d1 100644 --- a/src/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -77,12 +77,12 @@ static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, // that minimize the size of the worker's event queue. // In other words, prior UCX calls should mean that this loop only runs // a couple of times at most. - while (request->completed != 1) - { + do { ucp_worker_progress(dtl_handle->ucx_worker); - } - // Get the final status of the communication operation - final_request_status = ucp_request_check_status(request); + // usleep(100); + // Get the final status of the communication operation + final_request_status = ucp_request_check_status(request); + } while (final_request_status == UCS_INPROGRESS); // Free and deallocate the request object ucp_request_free(request); return final_request_status; @@ -350,6 +350,10 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char FLUX_LOG_INFO (dtl_handle->h, "Decoding consumer UCP address using base64\n"); dtl_handle->addr_len = base64_decoded_length(enc_addr_len); dtl_handle->consumer_address = (ucp_address_t*) malloc(dtl_handle->addr_len); + if (dtl_handle->consumer_address == NULL) { + FLUX_LOG_ERR (dtl_handle->h, "Could not allocate memory for consumer address"); + return DYAD_RC_SYSFAIL; + } decoded_len = base64_decode_using_maps (&base64_maps_rfc4648, (char*)dtl_handle->consumer_address, dtl_handle->addr_len, enc_addr, enc_addr_len); @@ -692,4 +696,4 @@ dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_t **self) rc = dyad_dtl_ucx_finalize_impl (&dtl_handle); (*self)->private.ucx_dtl_handle = NULL; return rc; -} \ No newline at end of file +} diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 628401da..5a8db126 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -85,6 +85,10 @@ static dyad_mod_ctx_t *getctx (flux_t *h) if (!ctx) { ctx = (dyad_mod_ctx_t *) malloc (sizeof (*ctx)); + if (ctx == NULL) { + FLUX_LOG_ERR (h, "DYAD_MOD: could not allocate memory for context"); + goto getctx_error; + } ctx->h = h; ctx->debug = false; ctx->handlers = NULL;