From 2a3780cecb746e94928ea4d9f49f7a8df75d85f1 Mon Sep 17 00:00:00 2001 From: ilumsden Date: Mon, 27 Feb 2023 10:32:11 -0500 Subject: [PATCH 01/18] Creates Data Transport Layer (DTL) sublibraries to Core and Module These sublibraries provide a common interface to sending/receiving data using different tools. The tools currently supported in the DTL sublibraries are: * Flux RPC (i.e., how DYAD has previously moved data) * UCX To control which tool is used, users can set the DYAD_DTL_MODE environment variable for the APIs. For the module/service, users specify which tool to use by passing a second argument on the command line. For both APIs and the module/service, the default DTL mode is Flux RPC. --- configure.ac | 41 ++- src/core/Makefile.am | 28 +- src/core/dtl/dyad_dtl.c | 158 ++++++++++ src/core/dtl/dyad_dtl.h | 36 +++ src/core/dtl/flux_dtl.c | 87 ++++++ src/core/dtl/flux_dtl.h | 41 +++ src/core/dtl/ucx_dtl.c | 497 ++++++++++++++++++++++++++++++ src/core/dtl/ucx_dtl.h | 47 +++ src/core/dyad_core.c | 334 +++++++++++++++++--- src/core/dyad_core.h | 16 +- src/core/dyad_dtl_defs.h | 24 ++ src/core/dyad_envs.h | 1 + src/core/dyad_rc.h | 28 +- src/modules/Makefile-urpc | 40 --- src/modules/Makefile.am | 32 +- src/modules/dtl/dyad_mod_dtl.c | 169 ++++++++++ src/modules/dtl/dyad_mod_dtl.h | 42 +++ src/modules/dtl/flux_mod_dtl.c | 82 +++++ src/modules/dtl/flux_mod_dtl.h | 38 +++ src/modules/dtl/ucx_mod_dtl.c | 347 +++++++++++++++++++++ src/modules/dtl/ucx_mod_dtl.h | 43 +++ src/modules/dyad.c | 221 ++++++++----- src/modules/dyad_ctx.h | 26 -- src/modules/dyad_flux_log.h | 22 ++ src/stream/Makefile.am | 4 +- src/stream/dyad_params.hpp | 4 + src/stream/dyad_stream_core.cpp | 54 +--- src/utils/Makefile.am | 10 +- src/utils/base64/Makefile.am | 3 + src/utils/base64/base64.c | 253 +++++++++++++++ src/utils/base64/base64.h | 241 +++++++++++++++ src/utils/base64/license | 17 + src/{modules => utils}/read_all.c | 0 src/{modules => utils}/read_all.h | 9 +- src/utils/utils.c | 10 + src/wrapper/Makefile.am | 8 +- src/wrapper/wrapper.c | 63 +--- 37 files changed, 2741 insertions(+), 335 deletions(-) create mode 100644 src/core/dtl/dyad_dtl.c create mode 100644 src/core/dtl/dyad_dtl.h create mode 100644 src/core/dtl/flux_dtl.c create mode 100644 src/core/dtl/flux_dtl.h create mode 100644 src/core/dtl/ucx_dtl.c create mode 100644 src/core/dtl/ucx_dtl.h create mode 100644 src/core/dyad_dtl_defs.h delete mode 100644 src/modules/Makefile-urpc create mode 100644 src/modules/dtl/dyad_mod_dtl.c create mode 100644 src/modules/dtl/dyad_mod_dtl.h create mode 100644 src/modules/dtl/flux_mod_dtl.c create mode 100644 src/modules/dtl/flux_mod_dtl.h create mode 100644 src/modules/dtl/ucx_mod_dtl.c create mode 100644 src/modules/dtl/ucx_mod_dtl.h delete mode 100644 src/modules/dyad_ctx.h create mode 100644 src/modules/dyad_flux_log.h create mode 100644 src/utils/base64/Makefile.am create mode 100644 src/utils/base64/base64.c create mode 100644 src/utils/base64/base64.h create mode 100644 src/utils/base64/license rename src/{modules => utils}/read_all.c (100%) rename src/{modules => utils}/read_all.h (82%) diff --git a/configure.ac b/configure.ac index a81029d5..ff988bc7 100644 --- a/configure.ac +++ b/configure.ac @@ -75,15 +75,47 @@ AC_ARG_ENABLE([perfflow], # TODO Add support for libb64 back once base64 encoding/decoding is fully complete # AC_ARG_VAR([LIBB64_DIR], [root directory for libb64]) +############################################# +# Define PKG_CHECK_VAR if it does not exist # +############################################# + +# Macro is copied from https://github.com/pkgconf/pkgconf/blob/master/pkg.m4 +# with minor modifications to change comments from using 'dnl' to '#' +m4_ifndef([PKG_CHECK_VAR], [ +# PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +# ------------------------------------------- +# Since: 0.28 +# +# Retrieves the value of the pkg-config variable for the given module. +AC_DEFUN([PKG_CHECK_VAR], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl +_PKG_CONFIG([$1], [variable="][$3]["], [$2]) +AS_VAR_COPY([$1], [pkg_cv_][$1]) +AS_VAR_IF([$1], [""], [$5], [$4])dnl +])# End of PKG_CHECK_VAR +]) + ######################## # Checks for libraries # ######################## # Check for the dl library (specifically, the "dlsym" function) AC_CHECK_LIB([dl], [dlsym]) -# FIXME: Replace 'main' with a function in '-ldyad_fstream': -# AC_CHECK_LIB([dyad_fstream], [main]) -# FIXME: Replace 'main' with a function in '-ltap': -# AC_CHECK_LIB([tap], [main]) +# Check for UCX v1.6.0 or higher (Required) +# TODO: make UCX optional based on a new AC_ARG_ENABLE flag +PKG_CHECK_MODULES([UCX], + [ucx >= 1.6.0] +) +PKG_CHECK_VAR([UCX_LIBDIR], + [ucx >= 1.6.0], + [libdir], + [], + [AC_MSG_FAILURE([Could not find libdir for UCX])] +) +AS_IF([test "x$UCX_LIBDIR" = "x"], + [AC_MSG_FAILURE([check_var succeeded, but value is incorrect])] +) # Find and get info for Flux-Core using pkg-config AX_FLUX_CORE PKG_CHECK_MODULES([JANSSON], @@ -162,6 +194,7 @@ fi AC_CONFIG_FILES([Makefile src/Makefile src/utils/Makefile + src/utils/base64/Makefile src/utils/libtap/Makefile src/core/Makefile src/stream/Makefile diff --git a/src/core/Makefile.am b/src/core/Makefile.am index 1c0a8916..ceabfe91 100644 --- a/src/core/Makefile.am +++ b/src/core/Makefile.am @@ -1,11 +1,29 @@ lib_LTLIBRARIES = libdyad_core.la -libdyad_core_la_SOURCES = dyad_core.c -libdyad_core_la_LIBADD = $(top_builddir)/src/utils/libutils.la $(top_builddir)/src/utils/libmurmur3.la $(FLUX_CORE_LIBS) -libdyad_core_la_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/utils $(FLUX_CORE_CFLAGS) -libdyad_core_la_LDFLAGS = -export-symbols dyad_core.sym $(AM_LDFLAGS) +libdyad_core_la_SOURCES = \ + dtl/dyad_dtl.c \ + dtl/dyad_dtl.h \ + dtl/flux_dtl.c \ + dtl/flux_dtl.h \ + dtl/ucx_dtl.c \ + dtl/ucx_dtl.h \ + dyad_core.c +libdyad_core_la_LIBADD = \ + $(top_builddir)/src/utils/libutils.la \ + $(top_builddir)/src/utils/libmurmur3.la \ + $(UCX_LIBS) \ + $(JANSSON_LIBS) \ + $(FLUX_CORE_LIBS) +libdyad_core_la_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_srcdir)/src/utils \ + -I$(top_srcdir)/src/utils/base64 \ + $(UCX_CFLAGS) \ + $(JANSSON_CFLAGS) \ + $(FLUX_CORE_CFLAGS) +libdyad_core_la_LDFLAGS = -export-symbols dyad_core.sym -Wl,-rpath,'$(UCX_LIBDIR)' $(AM_LDFLAGS) if PERFFLOW libdyad_core_la_LIBADD += $(PERFFLOW_LIBS) libdyad_core_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 endif -include_HEADERS = dyad_core.h dyad_envs.h dyad_rc.h dyad_flux_log.h +include_HEADERS = dyad_core.h dyad_envs.h dyad_rc.h dyad_flux_log.h dyad_dtl_defs.h diff --git a/src/core/dtl/dyad_dtl.c b/src/core/dtl/dyad_dtl.c new file mode 100644 index 00000000..e9d110bc --- /dev/null +++ b/src/core/dtl/dyad_dtl.c @@ -0,0 +1,158 @@ +#include "dyad_dtl.h" + +#include "dyad_dtl_defs.h" +#include "ucx_dtl.h" +#include "flux_dtl.h" + +// Actual definition of dyad_dtl_t +struct dyad_dtl { + flux_t* h; + dyad_dtl_mode_t mode; + void *real_dtl_handle; +}; + +dyad_rc_t dyad_dtl_init(dyad_dtl_mode_t mode, flux_t *h, + const char *kvs_namespace, bool debug, + dyad_dtl_t **dtl_handle) +{ + *dtl_handle = malloc(sizeof(struct dyad_dtl)); + if (*dtl_handle == NULL) + { + FLUX_LOG_ERR (h, "Could not allocate a dyad_dtl_t object\n"); + return DYAD_RC_SYSFAIL; + } + (*dtl_handle)->mode = mode; + (*dtl_handle)->h = h; + if (mode == DYAD_DTL_UCX) { + FLUX_LOG_INFO (h, "Initializing UCX DTL\n"); + return dyad_dtl_ucx_init (h, kvs_namespace, debug, + (dyad_dtl_ucx_t**)&((*dtl_handle)->real_dtl_handle)); + } + if (mode == DYAD_DTL_FLUX_RPC) + { + FLUX_LOG_INFO (h, "Initializing Flux RPC DTL\n"); + return dyad_dtl_flux_init (h, kvs_namespace, debug, + (dyad_dtl_flux_t**)&((*dtl_handle)->real_dtl_handle)); + } + FLUX_LOG_ERR (h, "Invalid DYAD DTL Mode: %d\n", (int) mode); + return DYAD_RC_BADDTLMODE; +} + +dyad_rc_t dyad_dtl_rpc_pack(dyad_dtl_t *dtl_handle, const char *upath, uint32_t producer_rank, + json_t **packed_obj) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) { + return dyad_dtl_ucx_rpc_pack( + (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle, + upath, + producer_rank, + packed_obj + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { + return dyad_dtl_flux_rpc_pack( + (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle, + upath, + producer_rank, + packed_obj + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); + return DYAD_RC_BADDTLMODE; +} + +dyad_rc_t dyad_dtl_recv_rpc_response(dyad_dtl_t* dtl_handle, flux_future_t *f) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) + { + return dyad_dtl_ucx_recv_rpc_response( + (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle, + f + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) + { + return dyad_dtl_flux_recv_rpc_response( + (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle, + f + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); + return DYAD_RC_BADDTLMODE; +} + +dyad_rc_t dyad_dtl_establish_connection(dyad_dtl_t *dtl_handle) { + if (dtl_handle->mode == DYAD_DTL_UCX) { + return dyad_dtl_ucx_establish_connection( + (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { + return dyad_dtl_flux_establish_connection( + (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); + return DYAD_RC_BADDTLMODE; +} + +dyad_rc_t dyad_dtl_recv(dyad_dtl_t *dtl_handle, + void **buf, size_t *buflen) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) { + return dyad_dtl_ucx_recv( + (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle, + buf, + buflen + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { + return dyad_dtl_flux_recv( + (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle, + buf, + buflen + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); + return DYAD_RC_BADDTLMODE; +} + +dyad_rc_t dyad_dtl_close_connection(dyad_dtl_t *dtl_handle) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) { + return dyad_dtl_ucx_close_connection( + (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { + return dyad_dtl_flux_close_connection( + (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); + return DYAD_RC_BADDTLMODE; +} + +dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle) +{ + if (dtl_handle == NULL || *dtl_handle == NULL) { + return DYAD_RC_OK; + } + flux_t* h = (*dtl_handle)->h; + dyad_dtl_mode_t mode = (*dtl_handle)->mode; + void *real_dtl_handle = (*dtl_handle)->real_dtl_handle; + free(*dtl_handle); + *dtl_handle = NULL; + if (mode == DYAD_DTL_UCX) { + return dyad_dtl_ucx_finalize( + (dyad_dtl_ucx_t*) real_dtl_handle + ); + } + if (mode == DYAD_DTL_FLUX_RPC) { + return dyad_dtl_flux_finalize( + (dyad_dtl_flux_t*) real_dtl_handle + ); + } + FLUX_LOG_ERR (h, "Invalid DYAD DTL Mode: %d\n", (int) mode); + return DYAD_RC_BADDTLMODE; +} diff --git a/src/core/dtl/dyad_dtl.h b/src/core/dtl/dyad_dtl.h new file mode 100644 index 00000000..eed5cd73 --- /dev/null +++ b/src/core/dtl/dyad_dtl.h @@ -0,0 +1,36 @@ +#ifndef __DYAD_DTL_H__ +#define __DYAD_DTL_H__ + +#include "dyad_dtl_defs.h" +#include "dyad_rc.h" + +#include +#include + +#ifdef __cplusplus +#include +#else +#include +#include +#endif + +dyad_rc_t dyad_dtl_init(dyad_dtl_mode_t mode, flux_t *h, + const char *kvs_namespace, bool debug, + dyad_dtl_t **dtl_handle); + +dyad_rc_t dyad_dtl_rpc_pack(dyad_dtl_t *dtl_handle, + const char *upath, uint32_t producer_rank, json_t **packed_obj); + +dyad_rc_t dyad_dtl_recv_rpc_response(dyad_dtl_t *dtl_handle, + flux_future_t *f); + +dyad_rc_t dyad_dtl_establish_connection(dyad_dtl_t *dtl_handle); + +dyad_rc_t dyad_dtl_recv(dyad_dtl_t *dtl_handle, + void **buf, size_t *buflen); + +dyad_rc_t dyad_dtl_close_connection(dyad_dtl_t *dtl_handle); + +dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle); + +#endif /* __DYAD_DTL_H__ */ diff --git a/src/core/dtl/flux_dtl.c b/src/core/dtl/flux_dtl.c new file mode 100644 index 00000000..6fc827d1 --- /dev/null +++ b/src/core/dtl/flux_dtl.c @@ -0,0 +1,87 @@ +#include "flux_dtl.h" +#include "dyad_rc.h" + +dyad_rc_t dyad_dtl_flux_init(flux_t *h, const char *kvs_namespace, + bool debug, dyad_dtl_flux_t **dtl_handle) +{ + *dtl_handle = malloc(sizeof(struct dyad_dtl_flux)); + if (*dtl_handle == NULL) + { + FLUX_LOG_ERR (h, "Cannot allocate the DTL handle for Flux\n"); + return DYAD_RC_SYSFAIL; + } + (*dtl_handle)->h = h; + (*dtl_handle)->kvs_namespace = kvs_namespace; + (*dtl_handle)->f = NULL; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_rpc_pack(dyad_dtl_flux_t *dtl_handle, const char *upath, + uint32_t producer_rank, json_t **packed_obj) +{ + *packed_obj = json_pack( + "{s:s}", + "upath", + upath + ); + if (*packed_obj == NULL) + { + FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath for Flux DTL\n"); + return DYAD_RC_BADPACK; + } + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_recv_rpc_response(dyad_dtl_flux_t *dtl_handle, + flux_future_t *f) +{ + dtl_handle->f = f; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_establish_connection(dyad_dtl_flux_t *dtl_handle) +{ + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_recv(dyad_dtl_flux_t *dtl_handle, + void **buf, size_t *buflen) +{ + int rc = 0; + void* tmp_buf = NULL; + size_t tmp_buflen = 0; + errno = 0; + FLUX_LOG_INFO (dtl_handle->h, "Get file contents from module using RPC\n"); + rc = flux_rpc_get_raw(dtl_handle->f, (const void**) &tmp_buf, (int*) &tmp_buflen); + if (rc < 0) + { + FLUX_LOG_ERR (dtl_handle->h, "Could not get file data from Flux RPC\n"); + if (errno == ENODATA) + return DYAD_RC_RPC_FINISHED; + return DYAD_RC_BADRPC; + } + *buflen = tmp_buflen; + *buf = malloc(*buflen); + memcpy(*buf, tmp_buf, *buflen); + flux_future_reset (dtl_handle->f); + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_close_connection(dyad_dtl_flux_t *dtl_handle) +{ + dtl_handle->f = NULL; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_finalize(dyad_dtl_flux_t *dtl_handle) +{ + if (dtl_handle != NULL) + { + dtl_handle->h = NULL; + dtl_handle->kvs_namespace = NULL; + dtl_handle->f = NULL; + free(dtl_handle); + dtl_handle = NULL; + } + return DYAD_RC_OK; +} diff --git a/src/core/dtl/flux_dtl.h b/src/core/dtl/flux_dtl.h new file mode 100644 index 00000000..a7e66609 --- /dev/null +++ b/src/core/dtl/flux_dtl.h @@ -0,0 +1,41 @@ +#ifndef __FLUX_DTL_H__ +#define __FLUX_DTL_H__ + +#include "dyad_flux_log.h" +#include "dyad_rc.h" + +#include + +#ifdef __cplusplus +#include +#else +#include +#endif + +struct dyad_dtl_flux { + flux_t *h; + const char *kvs_namespace; + flux_future_t *f; +}; + +typedef struct dyad_dtl_flux dyad_dtl_flux_t; + +dyad_rc_t dyad_dtl_flux_init(flux_t *h, const char *kvs_namespace, + bool debug, dyad_dtl_flux_t **dtl_handle); + +dyad_rc_t dyad_dtl_flux_rpc_pack(dyad_dtl_flux_t *dtl_handle, + const char *upath, uint32_t producer_rank, json_t **packed_obj); + +dyad_rc_t dyad_dtl_flux_recv_rpc_response(dyad_dtl_flux_t *dtl_handle, + flux_future_t *f); + +dyad_rc_t dyad_dtl_flux_establish_connection(dyad_dtl_flux_t *dtl_handle); + +dyad_rc_t dyad_dtl_flux_recv(dyad_dtl_flux_t *dtl_handle, + void **buf, size_t *buflen); + +dyad_rc_t dyad_dtl_flux_close_connection(dyad_dtl_flux_t *dtl_handle); + +dyad_rc_t dyad_dtl_flux_finalize(dyad_dtl_flux_t *dtl_handle); + +#endif /* __FLUX_DTL_H__ */ diff --git a/src/core/dtl/ucx_dtl.c b/src/core/dtl/ucx_dtl.c new file mode 100644 index 00000000..37b98bf8 --- /dev/null +++ b/src/core/dtl/ucx_dtl.c @@ -0,0 +1,497 @@ +#include "ucx_dtl.h" + +#include "dyad_rc.h" + +#include "base64.h" + +#ifdef __cplusplus +#include +#include +#include +#include +#else +#include +#include +#include +#include +#endif + +extern const base64_maps_t base64_maps_rfc4648; + +// Tag mask for UCX Tag send/recv +#define DYAD_UCX_TAG_MASK UINT64_MAX + +// Macro function used to simplify checking the status +// of UCX operations +#define UCX_STATUS_FAIL(status) status != UCS_OK + +// Define a request struct to be used in handling +// async UCX operations +struct ucx_request { + int completed; +}; +typedef struct ucx_request dyad_ucx_request_t; + +// Define a function that UCX will use to allocate and +// initialize our request struct +static void dyad_ucx_request_init(void *request) +{ + dyad_ucx_request_t *real_request = NULL; + real_request = (dyad_ucx_request_t*)request; + real_request->completed = 0; +} + +// Define a function that ucp_tag_msg_recv_nbx will use +// as a callback to signal the completion of the async receive +#if UCP_API_VERSION >= UCP_VERSION(1, 10) +static void dyad_recv_callback(void *request, ucs_status_t status, + const ucp_tag_recv_info_t *tag_info, void *user_data) +{ + dyad_ucx_request_t *real_request = NULL; + real_request = (dyad_ucx_request_t*) request; + real_request->completed = 1; +} +#else +static void dyad_recv_callback(void *request, ucs_status_t status, + ucp_tag_recv_info_t *tag_info) +{ + dyad_ucx_request_t *real_request = NULL; + real_request = (dyad_ucx_request_t*) request; + real_request->completed = 1; +} +#endif + +// Simple function used to wait on the async receive +static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, dyad_ucx_request_t *request) +{ + ucs_status_t final_request_status = UCS_OK; + // If 'request' is actually a request handle, this means the communication operation + // is scheduled, but not yet completed. + if (UCS_PTR_IS_PTR(request)) + { + // Spin lock until the request is completed + // The spin lock shouldn't be costly (performance-wise) + // because the wait should always come directly after other UCX calls + // that minimize the size of the worker's event queue. + // In other words, prior UCX calls should mean that this loop only runs + // a couple of times at most. + while (request->completed != 1) + { + ucp_worker_progress(dtl_handle->ucx_worker); + } + // Get the final status of the communication operation + final_request_status = ucp_request_check_status(request); + // Free and deallocate the request object + ucp_request_free(request); + return final_request_status; + } + // If 'request' is actually a UCX error, this means the communication + // operation immediately failed. In that case, we simply grab the 'ucs_status_t' + // object for the error. + else if (UCS_PTR_IS_ERR(request)) + { + return UCS_PTR_STATUS(request); + } + // If 'request' is neither a request handle nor an error, then + // the communication operation immediately completed successfully. + // So, we simply set the status to UCS_OK + return UCS_OK; +} + +dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, + bool debug, dyad_dtl_ucx_t **dtl_handle) +{ + ucp_params_t ucx_params; + ucp_worker_params_t worker_params; + ucp_config_t *config; + ucs_status_t status; + ucp_worker_attr_t worker_attrs; + + printf ("Allocating UCX DTL handle\n"); + *dtl_handle = malloc(sizeof(struct dyad_dtl_ucx)); + if (*dtl_handle == NULL) + { + FLUX_LOG_ERR (h, "Could not allocate UCX DTL context\n"); + return DYAD_RC_SYSFAIL; + } + // Allocation/Freeing of the Flux handle should be + // handled by the DYAD context + (*dtl_handle)->h = h; + // Allocation/Freeing of kvs_namespace should be + // handled by the DYAD context + (*dtl_handle)->kvs_namespace = kvs_namespace; + (*dtl_handle)->ucx_ctx = NULL; + (*dtl_handle)->ucx_worker = NULL; + (*dtl_handle)->consumer_address = NULL; + (*dtl_handle)->addr_len = 0; + + // Read the UCX configuration + FLUX_LOG_INFO ((*dtl_handle)->h, "Reading UCP config\n"); + printf ("Calling ucp_config_read\n"); + status = ucp_config_read (NULL, NULL, &config); + if (UCX_STATUS_FAIL(status)) + { + FLUX_LOG_ERR ((*dtl_handle)->h, "Could not read the UCX config\n"); + goto error; + } + + // Define the settings, parameters, features, etc. + // for the UCX context. UCX will use this info internally + // when creating workers, endpoints, etc. + // + // The settings enabled are: + // * Tag-matching send/recv + // * Remote Memory Access communication + // * Auto initialization of request objects + // * Worker sleep, wakeup, poll, etc. features + ucx_params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_SIZE | + UCP_PARAM_FIELD_REQUEST_INIT; + ucx_params.features = UCP_FEATURE_TAG | + UCP_FEATURE_RMA | + UCP_FEATURE_WAKEUP; + ucx_params.request_size = sizeof(struct ucx_request); + ucx_params.request_init = dyad_ucx_request_init; + + // Initialize UCX + FLUX_LOG_INFO ((*dtl_handle)->h, "Initializing UCP\n"); + printf ("Calling ucp_init\n"); + status = ucp_init(&ucx_params, config, &(*dtl_handle)->ucx_ctx); + + // If in debug mode, print the configuration of UCX to stderr + if (debug) + { + printf ("Calling ucp_config_print\n"); + ucp_config_print( + config, + stderr, + "UCX Configuration", + UCS_CONFIG_PRINT_CONFIG + ); + } + // Release the config + printf ("Calling ucp_config_release\n"); + ucp_config_release(config); + // Log an error if UCX initialization failed + if (UCX_STATUS_FAIL(status)) + { + FLUX_LOG_ERR (h, "ucp_init failed (status = %d)\n", status); + goto error; + } + + // Define the settings for the UCX worker (i.e., progress engine) + // + // The settings enabled are: + // * Single-threaded mode (TODO look into multi-threading support) + // * Restricting wakeup events to only include Tag-matching recv events + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE | + UCP_WORKER_PARAM_FIELD_EVENTS; + // TODO look into multi-threading support + worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + worker_params.events = UCP_WAKEUP_TAG_RECV; + + // Create the worker and log an error if that fails + FLUX_LOG_INFO ((*dtl_handle)->h, "Creating UCP worker\n"); + printf ("Calling ucp_worker_create\n"); + status = ucp_worker_create( + (*dtl_handle)->ucx_ctx, + &worker_params, + &(*dtl_handle)->ucx_worker + ); + if (UCX_STATUS_FAIL(status)) + { + FLUX_LOG_ERR (h, "ucp_worker_create failed (status = %d)!\n", status); + goto error; + } + + // Query the worker for its address + worker_attrs.field_mask = UCP_WORKER_ATTR_FIELD_ADDRESS; + FLUX_LOG_INFO ((*dtl_handle)->h, "Get address of UCP worker\n"); + printf ("Calling ucp_worker_query\n"); + status = ucp_worker_query( + (*dtl_handle)->ucx_worker, + &worker_attrs + ); + if (UCX_STATUS_FAIL(status)) + { + FLUX_LOG_ERR (h, "Cannot get UCX worker address (status = %d)!\n", status); + goto error; + } + (*dtl_handle)->consumer_address = worker_attrs.address; + (*dtl_handle)->addr_len = worker_attrs.address_length; + + return DYAD_RC_OK; + +error:; + // If an error occured, finalize the DTL handle and + // return a failing error code + // dyad_dtl_ucx_finalize(*dtl_handle); + return DYAD_RC_UCXINIT_FAIL; +} + +dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, const char *upath, + uint32_t producer_rank, json_t **packed_obj) +{ + size_t enc_len = 0; + char* enc_buf = NULL; + ssize_t enc_size = 0; + if (dtl_handle->consumer_address == NULL) + { + // TODO log error + return DYAD_RC_BADPACK; + } + FLUX_LOG_INFO (dtl_handle->h, "Encode UCP address using base64\n"); + enc_len = base64_encoded_length(dtl_handle->addr_len); + // Add 1 to encoded length because the encoded buffer will be + // packed as if it is a string + enc_buf = malloc(enc_len+1); + if (enc_buf == NULL) + { + FLUX_LOG_ERR (dtl_handle->h, "Could not allocate buffer for packed address\n"); + return DYAD_RC_SYSFAIL; + } + // consumer_address is casted to const char* to avoid warnings + // This is valid because it is a pointer to an opaque struct, + // so the cast can be treated like a void*->char* cast. + enc_size = base64_encode_using_maps(&base64_maps_rfc4648, + enc_buf, enc_len+1, + (const char*)dtl_handle->consumer_address, dtl_handle->addr_len); + if (enc_size < 0) + { + // TODO log error + free(enc_buf); + return DYAD_RC_BADPACK; + } + FLUX_LOG_INFO (dtl_handle->h, "Creating UCP tag for tag matching\n"); + // Because we're using tag-matching send/recv for communication, + // there's no need to do any real connection establishment here. + // Instead, we use this function to create the tag that will be + // used for the upcoming communication. + uint32_t consumer_rank = 0; + if (flux_get_rank(dtl_handle->h, &consumer_rank) < 0) + { + FLUX_LOG_ERR (dtl_handle->h, "Cannot get consumer rank\n"); + return DYAD_RC_FLUXFAIL; + } + // The tag is a 64 bit unsigned integer consisting of the + // 32-bit rank of the producer followed by the 32-bit rank + // of the consumer + dtl_handle->comm_tag = ((uint64_t)producer_rank << 32) | (uint64_t)consumer_rank; + // Use Jansson to pack the tag and UCX address into + // the payload to be sent via RPC to the producer plugin + FLUX_LOG_INFO (dtl_handle->h, "Packing RPC payload for UCX DTL\n"); + *packed_obj = json_pack( + "{s:s, s:i, s:i, s:s%}", + "upath", + upath, + "tag_prod", + (int) producer_rank, + "tag_cons", + (int) consumer_rank, + "ucx_addr", + enc_buf, enc_len + ); + free(enc_buf); + // If the packing failed, log an error + if (*packed_obj == NULL) + { + FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath and UCX address for RPC\n"); + return DYAD_RC_BADPACK; + } + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_recv_rpc_response(dyad_dtl_ucx_t *dtl_handle, + flux_future_t *f) +{ + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_establish_connection(dyad_dtl_ucx_t *dtl_handle) +{ + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, + void **buf, size_t *buflen) +{ + ucs_status_t status; + ucp_tag_message_h msg = NULL; + ucp_tag_recv_info_t msg_info; + dyad_ucx_request_t* req = NULL; + // Use 'ucp_worker_wait' to poll the worker until + // the tag recv event that we're looking for comes in. + FLUX_LOG_INFO (dtl_handle->h, "Starting UCP polling for incoming data\n"); + do { + FLUX_LOG_INFO (dtl_handle->h, "Progress UCP worker\n"); + ucp_worker_progress (dtl_handle->ucx_worker); + FLUX_LOG_INFO (dtl_handle->h, "Probe the UCP worker for messages on tag %lu\n", dtl_handle->comm_tag); + msg = ucp_tag_probe_nb( + dtl_handle->ucx_worker, + dtl_handle->comm_tag, + DYAD_UCX_TAG_MASK, + 1, // Remove the message from UCP tracking + // Requires calling ucp_tag_msg_recv_nb + // with the ucp_tag_message_h to retrieve message + &msg_info + ); + } while (msg == NULL); + // while (true) + // { + // // Probe the tag recv event at the top + // // of the worker's queue + // FLUX_LOG_INFO (dtl_handle->h, "Probe UCP worker with tag %lu\n", dtl_handle->comm_tag); + // msg = ucp_tag_probe_nb( + // dtl_handle->ucx_worker, + // dtl_handle->comm_tag, + // DYAD_UCX_TAG_MASK, + // 1, // Remove the message from UCP tracking + // // Requires calling ucp_tag_msg_recv_nb + // // with the ucp_tag_message_h to retrieve message + // &msg_info + // ); + // // If data has arrived from the producer plugin, + // // break the loop + // if (msg != NULL) + // { + // FLUX_LOG_INFO (dtl_handle->h, "Data has arrived, so end polling\n"); + // break; + // } + // // If data has not arrived, check if there are + // // any other events in the worker's queue. + // // If so, start the loop over to handle the next event + // else if (ucp_worker_progress(dtl_handle->ucx_worker)) + // { + // FLUX_LOG_INFO (dtl_handle->h, "Progressed UCP worker to check if any other UCP events are available\n"); + // continue; + // } + // // No other events are queued. So, we will wait on new + // // events to come in. By using 'ucp_worker_wait' for this, + // // we let the OS do other work in the meantime (no spin locking). + // FLUX_LOG_INFO (dtl_handle->h, "Launch pre-emptable wait until UCP worker gets new events\n"); + // status = ucp_worker_wait(dtl_handle->ucx_worker); + // // If the wait fails, log an error + // if (UCX_STATUS_FAIL(status)) + // { + // FLUX_LOG_ERR (dtl_handle->h, "Could not wait on the message from the producer plugin\n"); + // return DYAD_RC_UCXWAIT_FAIL; + // } + // } + // The metadata retrived from the probed tag recv event contains + // the size of the data to be sent. + // So, use that size to allocate a buffer + *buflen = msg_info.length; + *buf = malloc(*buflen); + // If allocation fails, log an error + if (*buf == NULL) + { + FLUX_LOG_ERR (dtl_handle->h, "Could not allocate memory for file\n"); + return DYAD_RC_SYSFAIL; + } + FLUX_LOG_INFO (dtl_handle->h, "Receive data using async UCX operation\n"); +#if UCP_API_VERSION >= UCP_VERSION(1, 10) + // Define the settings for the recv operation + // + // The settings enabled are: + // * Define callback for the recv because it is async + // * Restrict memory buffers to host-only since we aren't directly + // dealing with GPU memory + ucp_request_param_t recv_params; + // TODO consider enabling UCP_OP_ATTR_FIELD_MEMH to speedup + // the recv operation if using RMA behind the scenes + recv_params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | + UCP_OP_ATTR_FIELD_MEMORY_TYPE; + recv_params.cb.recv = dyad_recv_callback; + // Constraining to Host memory (as opposed to GPU memory) + // allows UCX to potentially perform some optimizations + recv_params.memory_type = UCS_MEMORY_TYPE_HOST; + // Perform the async recv operation using the probed tag recv event + req = ucp_tag_msg_recv_nbx( + dtl_handle->ucx_worker, + *buf, + *buflen, + msg, + &recv_params + ); +#else + req = ucp_tag_msg_recv_nb( + dtl_handle->ucx_worker, + *buf, + *buflen, + UCP_DATATYPE_CONTIG, + msg, + dyad_recv_callback + ); +#endif + // Wait on the recv operation to complete + FLUX_LOG_INFO (dtl_handle->h, "Wait for UCP recv operation to complete\n"); + status = dyad_ucx_request_wait(dtl_handle, req); + // If the recv operation failed, log an error, free the data buffer, + // and set the buffer pointer to NULL + if (UCX_STATUS_FAIL(status)) + { + FLUX_LOG_ERR (dtl_handle->h, "UCX recv failed!\n"); + free(*buf); + *buf = NULL; + return DYAD_RC_UCXCOMM_FAIL; + } + FLUX_LOG_INFO (dtl_handle->h, "Data receive using UCX is successful\n"); + FLUX_LOG_INFO (dtl_handle->h, "Received %lu bytes from producer\n", *buflen); + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_close_connection(dyad_dtl_ucx_t *dtl_handle) +{ + // Since we're using tag send/recv, there's no need + // to explicitly close the connection. So, all we're + // doing here is setting the tag back to 0 (which cannot + // be valid for DYAD because DYAD won't send a file from + // one node to the same node). + dtl_handle->comm_tag = 0; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_ucx_t *dtl_handle) +{ + if (dtl_handle != NULL) + { + FLUX_LOG_INFO (dtl_handle->h, "Finalizing UCX DTL\n"); + FLUX_LOG_INFO (dtl_handle->h, "Releasing KVS Namespace\n"); + // KVS namespace string should be released by the + // DYAD context, so it is not released here + dtl_handle->kvs_namespace = NULL; + // Release consumer address if not already released + if (dtl_handle->consumer_address != NULL) + { + FLUX_LOG_INFO (dtl_handle->h, "Releasing worker address\n"); + ucp_worker_release_address( + dtl_handle->ucx_worker, + dtl_handle->consumer_address + ); + dtl_handle->consumer_address = NULL; + } + // Release worker if not already released + if (dtl_handle->ucx_worker != NULL) + { + FLUX_LOG_INFO (dtl_handle->h, "Releasing worker\n"); + ucp_worker_destroy(dtl_handle->ucx_worker); + dtl_handle->ucx_worker = NULL; + FLUX_LOG_INFO (dtl_handle->h, "Worker released\n"); + } + // Release context if not already released + if (dtl_handle->ucx_ctx != NULL) + { + FLUX_LOG_INFO (dtl_handle->h, "Releasing context\n"); + ucp_cleanup(dtl_handle->ucx_ctx); + dtl_handle->ucx_ctx = NULL; + } + FLUX_LOG_INFO (dtl_handle->h, "Releasing Flux handle\n"); + // Flux handle should be released by the + // DYAD context, so it is not released here + dtl_handle->h = NULL; + // Free the handle and set to NULL to prevent double free + free(dtl_handle); + dtl_handle = NULL; + } + return DYAD_RC_OK; +} diff --git a/src/core/dtl/ucx_dtl.h b/src/core/dtl/ucx_dtl.h new file mode 100644 index 00000000..27b504c9 --- /dev/null +++ b/src/core/dtl/ucx_dtl.h @@ -0,0 +1,47 @@ +#ifndef __UCX_DTL_H__ +#define __UCX_DTL_H__ + +#include "dyad_flux_log.h" +#include "dyad_rc.h" + +#include +#include + +#ifdef __cplusplus +#include +#else +#include +#endif + +struct dyad_dtl_ucx { + flux_t *h; + const char *kvs_namespace; + ucp_context_h ucx_ctx; + ucp_worker_h ucx_worker; + ucp_address_t *consumer_address; + size_t addr_len; + ucp_tag_t comm_tag; +}; + +typedef struct dyad_dtl_ucx dyad_dtl_ucx_t; + +dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, + bool debug, dyad_dtl_ucx_t **dtl_handle); + +dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, + const char *upath, uint32_t producer_rank, json_t **packed_obj); + +dyad_rc_t dyad_dtl_ucx_recv_rpc_response(dyad_dtl_ucx_t *dtl_handle, + flux_future_t *f); + +dyad_rc_t dyad_dtl_ucx_establish_connection(dyad_dtl_ucx_t *dtl_handle); + +dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, + void **buf, size_t *buflen); + +dyad_rc_t dyad_dtl_ucx_close_connection( + dyad_dtl_ucx_t *dtl_handle); + +dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_ucx_t *dtl_handle); + +#endif /* __UCX_DTL_H__ */ diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index 0db18ea7..d53ce570 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -2,7 +2,10 @@ #include #include "dyad_core.h" +#include "dyad_dtl_defs.h" #include "dyad_flux_log.h" +#include "dyad_rc.h" +#include "dtl/dyad_dtl.h" #include "murmur3.h" #include "utils.h" @@ -16,6 +19,7 @@ const struct dyad_ctx dyad_ctx_default = { NULL, // h + NULL, // dtl_handle false, // debug false, // check false, // reenter @@ -104,12 +108,10 @@ static inline dyad_rc_t publish_via_flux (const dyad_ctx_t* restrict ctx, #endif { dyad_rc_t rc = DYAD_RC_OK; - const char* prod_managed_path = NULL; flux_kvs_txn_t* txn = NULL; const size_t topic_len = PATH_MAX; char topic[PATH_MAX + 1]; memset (topic, 0, topic_len + 1); - prod_managed_path = ctx->prod_managed_path; memset (topic, '\0', topic_len + 1); // Generate the KVS key from the file path relative to // the producer-managed directory @@ -201,12 +203,14 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, flux_future_t** f) #endif { + printf ("In dyad_kvs_lookup\n"); dyad_rc_t rc = DYAD_RC_OK; // Lookup information about the desired file (represented by kvs_topic) // from the Flux KVS. If there is no information, wait for it to be // made available DYAD_LOG_INFO (ctx, "Retrieving information from KVS under the key %s\n", kvs_topic); + printf ("Calling flux_kvs_lookup\n"); *f = flux_kvs_lookup (ctx->h, ctx->kvs_namespace, FLUX_KVS_WAITCREATE, kvs_topic); // If the KVS lookup failed, log an error and return DYAD_BADLOOKUP @@ -216,12 +220,14 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, } // Extract the rank of the producer from the KVS response DYAD_LOG_INFO (ctx, "Retrieving owner rank from KVS entry\n"); + printf ("Unpacking producer rank from KVS\n"); rc = flux_kvs_lookup_get_unpack (*f, "i", owner_rank); // If the extraction did not work, log an error and return DYAD_BADFETCH if (rc < 0) { DYAD_LOG_ERR (ctx, "Could not unpack owner's rank from KVS response\n"); return DYAD_RC_BADFETCH; } + printf ("Finished with dyad_kvs_lookup\n"); return DYAD_RC_OK; } @@ -236,6 +242,7 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, dyad_kvs_response_t** restrict resp) #endif { + printf ("In dyad_fetch\n"); dyad_rc_t rc = DYAD_RC_OK; char upath[PATH_MAX]; uint32_t owner_rank = 0; @@ -247,10 +254,13 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // Extract the path to the file specified by fname relative to the // consumer-managed path // This relative path will be stored in upath + printf ("Checking if %s is in consumer path\n", fname); if (!cmp_canonical_path_prefix (ctx->cons_managed_path, fname, upath, PATH_MAX)) { + printf ("%s not in consumer path, so exiting\n"); DYAD_LOG_INFO (ctx, "%s is not in the Consumer's managed path\n", fname); + printf ("Done with dyad_fetch\n"); return DYAD_RC_OK; } DYAD_LOG_INFO (ctx, @@ -258,10 +268,12 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, upath); // Generate the KVS key from the file path relative to // the consumer-managed directory + printf ("Generate KVS key\n"); gen_path_key (upath, topic, topic_len, ctx->key_depth, ctx->key_bins); DYAD_LOG_INFO (ctx, "Generated KVS key for consumer: %s\n", topic); // Call dyad_kvs_lookup to retrieve infromation about the file // from the Flux KVS + printf ("Calling dyad_kvs_lookup\n"); rc = dyad_kvs_lookup (ctx, topic, &owner_rank, &f); // If an error occured in dyad_kvs_lookup, log it and propagate the return // code @@ -276,6 +288,7 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // object, and return DYAD_OK. This will cause the file transfer step to be // skipped if (ctx->shared_storage || (owner_rank == ctx->rank)) { + printf ("Either shared_storage is enabled or we are on the producer node. So skip\n"); DYAD_LOG_INFO (ctx, "Either shared-storage is enabled or the producer rank " "is the " @@ -289,98 +302,201 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // return code DYAD_LOG_INFO (ctx, "Creating KVS response object to store retrieved data\n"); + printf ("Allocating KVS response struct\n"); *resp = malloc (sizeof (struct dyad_kvs_response)); if (*resp == NULL) { DYAD_LOG_ERR (ctx, "Cannot allocate a dyad_kvs_response_t object!\n"); rc = DYAD_RC_BADRESPONSE; goto fetch_done; } + printf ("Allocating space for upath (%s)\n", upath); (*resp)->fpath = malloc (strlen (upath) + 1); + if ((*resp)->fpath == NULL) + { + DYAD_LOG_ERR (ctx, "Cannot allocate a buffer for the file path in the dyad_kvs_response_t object\n"); + free(*resp); + rc = DYAD_RC_BADRESPONSE; + goto fetch_done; + } + printf ("Filling KVS response\n"); strncpy ((*resp)->fpath, upath, strlen (upath) + 1); (*resp)->owner_rank = owner_rank; rc = DYAD_RC_OK; fetch_done:; // Destroy the Flux future if needed if (f != NULL) { + printf ("Destroying future\n"); flux_future_destroy (f); f = NULL; } + printf ("Finished with dyad_fetch\n"); return rc; } +// static inline dyad_rc_t process_remaining_rpc_msgs (const dyad_ctx_t* ctx, flux_future_t* f) +// { +// DYAD_LOG_INFO (ctx, "In process_remaining_rpc_msgs\n"); +// int rc = 0; +// while (true) { +// if ((rc = flux_rpc_get (f, NULL)) < 0) { +// DYAD_LOG_INFO(ctx, "flux_rpc_get returned < 0 (rc = %d)\n", rc); +// if (errno == ENODATA) { +// DYAD_LOG_INFO (ctx, "Reached end of RPC stream from module"); +// return DYAD_RC_OK; +// } else { +// DYAD_LOG_ERR (ctx, "An error occured in the DYAD module\n"); +// return DYAD_RC_BADRPC; +// } +// } +// DYAD_LOG_INFO(ctx, "flux_rpc_get returned >= 0 (rc = %d)\n", rc); +// flux_future_reset (f); +// } +// } + #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t dyad_rpc_get ( +static dyad_rc_t dyad_get_data ( const dyad_ctx_t* ctx, const dyad_kvs_response_t* restrict kvs_data, const char** file_data, - int* file_len, - flux_future_t** f) + size_t* file_len) #else -static inline dyad_rc_t dyad_rpc_get ( +static inline dyad_rc_t dyad_get_data ( const dyad_ctx_t* ctx, const dyad_kvs_response_t* restrict kvs_data, const char** file_data, - int* file_len, - flux_future_t** f) + size_t* file_len) #endif { + printf ("In dyad_get_data\n"); dyad_rc_t rc = DYAD_RC_OK; - // Create and send an RPC payload to the producer's Flux broker. - // This payload will tell the broker to run the dyad.fetch function - // with the upath specified by the data fetched from the KVS - DYAD_LOG_INFO (ctx, "Sending RPC for data for %s from rank %u\n", - kvs_data->fpath, kvs_data->owner_rank); - *f = flux_rpc_pack (ctx->h, "dyad.fetch", kvs_data->owner_rank, 0, "{s:s}", - "upath", kvs_data->fpath); - // If the RPC dispatch failed, log an error and return DYAD_BADRPC - if (*f == NULL) { - DYAD_LOG_ERR (ctx, "Cannot send RPC to producer plugin!\n"); - return DYAD_RC_BADRPC; + dyad_rc_t final_rc = DYAD_RC_OK; + flux_future_t *f; + json_t* rpc_payload; + DYAD_LOG_INFO (ctx, "Packing payload for RPC to DYAD module"); + printf ("Calling dyad_dtl_rpc_pack\n"); + rc = dyad_dtl_rpc_pack ( + ctx->dtl_handle, + kvs_data->fpath, + kvs_data->owner_rank, + &rpc_payload + ); + if (DYAD_IS_ERROR(rc)) + { + DYAD_LOG_ERR(ctx, "Cannot create JSON payload for Flux RPC to DYAD module\n"); + goto get_done; + } + DYAD_LOG_INFO (ctx, "Sending payload for RPC to DYAD module"); + printf ("Calling flux_rpc_pack\n"); + f = flux_rpc_pack ( + ctx->h, + "dyad.fetch", + kvs_data->owner_rank, + FLUX_RPC_STREAMING, + "o", + rpc_payload + ); + if (f == NULL) + { + DYAD_LOG_ERR(ctx, "Cannot send RPC to producer module\n"); + rc = DYAD_RC_BADRPC; + goto get_done; + } + DYAD_LOG_INFO (ctx, "Receive RPC response from DYAD module"); + printf ("Calling dyad_dtl_recv_rpc_response\n"); + rc = dyad_dtl_recv_rpc_response(ctx->dtl_handle, f); + if (DYAD_IS_ERROR(rc)) + { + DYAD_LOG_ERR(ctx, "Cannot receive and/or parse the RPC response\n"); + goto get_done; + } + DYAD_LOG_INFO (ctx, "Establish DTL connection with DYAD module"); + printf ("Calling dyad_dtl_establish_connection\n"); + rc = dyad_dtl_establish_connection ( + ctx->dtl_handle + ); + if (DYAD_IS_ERROR(rc)) { + DYAD_LOG_ERR (ctx, "Cannot establish connection with DYAD module on broker %u\n", kvs_data->owner_rank); + goto get_done; + } + DYAD_LOG_INFO (ctx, "Receive file data via DTL"); + printf ("Calling dyad_dtl_recv\n"); + rc = dyad_dtl_recv ( + ctx->dtl_handle, + (void**) file_data, + file_len + ); + DYAD_LOG_INFO (ctx, "Close DTL connection with DYAD module"); + printf ("Calling dyad_dtl_close_connection\n"); + dyad_dtl_close_connection (ctx->dtl_handle); + if (DYAD_IS_ERROR(rc)) + { + DYAD_LOG_ERR (ctx, "Cannot receive data from producer module\n"); + goto get_done; } - DYAD_LOG_INFO (ctx, "Retrieving data from producer\n"); - // Extract the file data from the RPC response - rc = flux_rpc_get_raw (*f, (const void**)file_data, file_len); - // If the extraction failed, log an error and return DYAD_BADRPC - if (rc < 0) { - DYAD_LOG_ERR (ctx, "Cannot get file back from plugin via RPC!\n"); - return DYAD_RC_BADRPC; + + rc = DYAD_RC_OK; + +get_done:; + // There are two return codes that have special meaning when coming from the DTL: + // * DYAD_RC_RPC_FINISHED: occurs when an ENODATA error occurs + // * DYAD_RC_BADRPC: occurs when a previous RPC operation fails + // In either of these cases, we do not need to wait for the end of stream because + // the RPC is already completely messed up. + // If we do not have either of these cases, we will wait for one more RPC message. + // If everything went well in the module, this last message will set errno to ENODATA (i.e., end of stream). + // Otherwise, something went wrong, so we'll return DYAD_RC_BADRPC. + if (rc != DYAD_RC_RPC_FINISHED && rc != DYAD_RC_BADRPC) { + printf ("Waiting for end-of-stream message from module\n"); + DYAD_LOG_INFO (ctx, "Wait for end-of-stream message from module\n"); + if (flux_rpc_get (f, NULL) < 0 && errno == ENODATA) { + printf ("Wait completed successfully\n"); + DYAD_LOG_ERR (ctx, "Received end-of-stream message (ENODATA) from module\n"); + } else { + DYAD_LOG_ERR (ctx, "An error occured at end of getting data! Either the module sent too many responses, or the module failed with a bad error (errno = %d)\n", errno); + printf ("Wait completed with error\n"); + rc = DYAD_RC_BADRPC; + } } - return DYAD_RC_OK; + DYAD_LOG_INFO (ctx, "Destroy the Flux future for the RPC\n"); + printf ("Destroying future\n"); + flux_future_destroy (f); + printf ("Finished with dyad_get_data\n"); + return rc; } #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, - const char* restrict fname, const dyad_kvs_response_t* restrict kvs_data) #else static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, - const char* restrict fname, const dyad_kvs_response_t* restrict kvs_data) #endif { + printf ("In dyad_pull\n"); dyad_rc_t rc = DYAD_RC_OK; const char* file_data = NULL; - int file_len = 0; + size_t file_len = 0; const char* odir = NULL; FILE* of = NULL; char file_path[PATH_MAX + 1]; char file_path_copy[PATH_MAX + 1]; mode_t m = (S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH | S_ISGID); size_t written_len = 0; - flux_future_t* f = NULL; memset (file_path, 0, PATH_MAX + 1); memset (file_path_copy, 0, PATH_MAX + 1); - // Call dyad_rpc_get to dispatch a RPC to the producer's Flux broker + // Call dyad_get_data to dispatch a RPC to the producer's Flux broker // and retrieve the data associated with the file - rc = dyad_rpc_get (ctx, kvs_data, &file_data, &file_len, &f); - if (rc != DYAD_RC_OK) { + printf ("Calling dyad_get_data\n"); + rc = dyad_get_data (ctx, kvs_data, &file_data, &file_len); + if (DYAD_IS_ERROR(rc)) { goto pull_done; } // Build the full path to the file being consumed + printf ("Build the full path to the file\n"); strncpy (file_path, ctx->cons_managed_path, PATH_MAX - 1); concat_str (file_path, kvs_data->fpath, "/", PATH_MAX); strncpy (file_path_copy, file_path, PATH_MAX); // dirname modifies the arg @@ -388,6 +504,7 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, DYAD_LOG_INFO (ctx, "Saving retrieved data to %s\n", file_path); // Create the directory as needed // TODO: Need to be consistent with the mode at the source + printf ("Creating directories as needed\n"); odir = dirname (file_path_copy); if ((strncmp (odir, ".", strlen (".")) != 0) && (mkdir_as_needed (odir, m) < 0)) { @@ -399,18 +516,21 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, } // Write the file contents to the location specified by the user + printf ("Openning file (%s)\n", file_path); of = fopen (file_path, "w"); if (of == NULL) { DYAD_LOG_ERR (ctx, "Cannot open file %s\n", file_path); rc = DYAD_RC_BADFIO; goto pull_done; } + printf ("Saving file (%s)\n", file_path); written_len = fwrite (file_data, sizeof (char), (size_t)file_len, of); if (written_len != (size_t)file_len) { DYAD_LOG_ERR (ctx, "fwrite of pulled file failed!\n"); rc = DYAD_RC_BADFIO; goto pull_done; } + printf ("Closing file (%s)\n", file_path); rc = fclose (of); if (rc != 0) { @@ -420,15 +540,15 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, rc = DYAD_RC_OK; pull_done: - // Destroy the Flux future for the RPC, if needed - if (f != NULL) { - flux_future_destroy (f); - f = NULL; + if (file_data != NULL) { + printf ("Freeing file data\n"); + free(file_data); } // If "check" is set and the operation was successful, set the // DYAD_CHECK_ENV environment variable to "ok" if (rc == DYAD_RC_OK && (ctx && ctx->check)) setenv (DYAD_CHECK_ENV, "ok", 1); + printf ("Finished with dyad_pull\n"); return rc; } @@ -440,8 +560,12 @@ dyad_rc_t dyad_init (bool debug, const char* kvs_namespace, const char* prod_managed_path, const char* cons_managed_path, + dyad_dtl_mode_t dtl_mode, dyad_ctx_t** ctx) { + printf ("Initializing DYAD!\n"); + dyad_rc_t rc = DYAD_RC_OK; + printf ("Checking if ctx is NULL\n"); // If ctx is NULL, we won't be able to return a dyad_ctx_t // to the user. In that case, print an error and return // immediately with DYAD_NOCTX. @@ -474,9 +598,11 @@ dyad_rc_t dyad_init (bool debug, } // Set the initial contents of the dyad_ctx_t object // to dyad_ctx_default. + printf ("Setting default values for ctx\n"); **ctx = dyad_ctx_default; // If neither managed path is provided, DYAD will not do anything. // So, simply print a warning and return DYAD_OK. + printf ("Checking prod path and cons path\n"); if (prod_managed_path == NULL && cons_managed_path == NULL) { fprintf (stderr, "Warning: no managed path provided! DYAD will not do " @@ -491,6 +617,7 @@ dyad_rc_t dyad_init (bool debug, (*ctx)->key_bins = key_bins; // Open a Flux handle and store it in the dyad_ctx_t // object. If the open operation failed, return DYAD_FLUXFAIL + printf ("Openning Flux\n"); (*ctx)->h = flux_open (NULL, 0); if ((*ctx)->h == NULL) { fprintf (stderr, "Could not open Flux handle!\n"); @@ -498,11 +625,15 @@ dyad_rc_t dyad_init (bool debug, } // Get the rank of the Flux broker corresponding // to the handle. If this fails, return DYAD_FLUXFAIL + FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: getting Flux rank"); + printf ("Getting Flux rank\n"); if (flux_get_rank ((*ctx)->h, &((*ctx)->rank)) < 0) { FLUX_LOG_ERR ((*ctx)->h, "Could not get Flux rank!\n"); return DYAD_RC_FLUXFAIL; } // If the namespace is provided, copy it into the dyad_ctx_t object + FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: saving KVS namespace"); + printf ("Setting KVS namespace\n"); if (kvs_namespace == NULL) { FLUX_LOG_ERR ((*ctx)->h, "No KVS namespace provided!\n"); // TODO see if we want a different return val @@ -518,8 +649,26 @@ dyad_rc_t dyad_init (bool debug, return DYAD_RC_NOCTX; } strncpy ((*ctx)->kvs_namespace, kvs_namespace, namespace_len + 1); + // Initialize the DTL based on the value of dtl_mode + // If an error occurs, log it and return an error + FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: inintializing DYAD DTL"); + printf ("Initializing DYAD DTL\n"); + rc = dyad_dtl_init( + dtl_mode, + (*ctx)->h, + (*ctx)->kvs_namespace, + (*ctx)->debug, + &(*ctx)->dtl_handle + ); + if (DYAD_IS_ERROR(rc)) + { + FLUX_LOG_ERR ((*ctx)->h, "Cannot initialize the DTL\n"); + return rc; + } // If the producer-managed path is provided, copy it into // the dyad_ctx_t object + FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: saving producer path"); + printf ("Copying producer path\n"); if (prod_managed_path == NULL) { (*ctx)->prod_managed_path = NULL; } else { @@ -539,6 +688,8 @@ dyad_rc_t dyad_init (bool debug, } // If the consumer-managed path is provided, copy it into // the dyad_ctx_t object + FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: saving consumer path"); + printf ("Copying consumer path\n"); if (cons_managed_path == NULL) { (*ctx)->cons_managed_path = NULL; } else { @@ -562,9 +713,96 @@ dyad_rc_t dyad_init (bool debug, (*ctx)->reenter = true; (*ctx)->initialized = true; // TODO Print logging info + printf ("Done with dyad_init\n"); return DYAD_RC_OK; } +dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) +{ + char *e = NULL; + bool debug = false; + bool check = false; + bool shared_storage = false; + unsigned int key_depth = 0; + unsigned int key_bins = 0; + char *kvs_namespace = NULL; + char *prod_managed_path = NULL; + char *cons_managed_path = NULL; + size_t dtl_mode_env_len = 0; + dyad_dtl_mode_t dtl_mode = DYAD_DTL_FLUX_RPC; + + printf("DYAD_CORE: Initializing with environment variables\n"); + + if ((e = getenv (DYAD_SYNC_DEBUG_ENV))) { + debug = true; + enable_debug_dyad_utils (); + } else { + debug = false; + disable_debug_dyad_utils (); + } + + if ((e = getenv (DYAD_SYNC_CHECK_ENV))) { + check = true; + } else { + check = false; + } + + if ((e = getenv (DYAD_SHARED_STORAGE_ENV))) { + shared_storage = true; + } else { + shared_storage = false; + } + + if ((e = getenv (DYAD_KEY_DEPTH_ENV))) { + key_depth = atoi (e); + } else { + key_depth = 3; + } + + if ((e = getenv (DYAD_KEY_BINS_ENV))) { + key_bins = atoi (e); + } else { + key_bins = 1024; + } + + if ((e = getenv (DYAD_KVS_NAMESPACE_ENV))) { + kvs_namespace = e; + } else { + kvs_namespace = NULL; + } + + if ((e = getenv (DYAD_PATH_CONSUMER_ENV))) { + cons_managed_path = e; + } else { + cons_managed_path = NULL; + } + + if ((e = getenv (DYAD_PATH_PRODUCER_ENV))) { + prod_managed_path = e; + } else { + prod_managed_path = NULL; + } + + if ((e = getenv (DYAD_DTL_MODE_ENV))) { + dtl_mode_env_len = strlen (e); + if (strncmp (e, "FLUX_RPC", dtl_mode_env_len) == 0) { + dtl_mode = DYAD_DTL_FLUX_RPC; + } else if (strncmp (e, "UCX", dtl_mode_env_len) == 0) { + dtl_mode = DYAD_DTL_UCX; + } else { + printf ("Invalid DTL mode provided through %s. \ + Defaulting the FLUX_RPC\n", DYAD_DTL_MODE_ENV); + dtl_mode = DYAD_DTL_FLUX_RPC; + } + } else { + dtl_mode = DYAD_DTL_FLUX_RPC; + } + printf ("DYAD_CORE: retrieved configuration from environment. Now initializing DYAD\n"); + return dyad_init (debug, check, shared_storage, key_depth, key_bins, + kvs_namespace, prod_managed_path, cons_managed_path, + dtl_mode, ctx); +} + dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) { // If the context is not defined, then it is not valid. @@ -585,23 +823,28 @@ dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) { + printf ("Calling dyad_consume for %s (ctx is NULL: %d)\n", fname, (ctx == NULL)); int rc = 0; // If the context is not defined, then it is not valid. // So, return DYAD_NOCTX + printf ("Checking if ctx is NULL\n"); if (!ctx || !ctx->h) { return DYAD_RC_NOCTX; } // If the consumer-managed path is NULL or empty, then the context is not // valid for a consumer operation. So, return DYAD_BADMANAGEDPATH + printf ("Confirming that consumer path is set\n"); if (ctx->cons_managed_path == NULL || strlen (ctx->cons_managed_path) == 0) { return DYAD_RC_BADMANAGEDPATH; } // Set reenter to false to avoid recursively performing // DYAD operations + printf ("Setting reenter to false to avoid recursion\n"); ctx->reenter = false; // Call dyad_fetch to get (and possibly wait on) // data from the Flux KVS + printf ("Calling dyad_fetch\n"); dyad_kvs_response_t* resp = NULL; rc = dyad_fetch (ctx, fname, &resp); // If an error occured in dyad_fetch, log an error @@ -615,15 +858,18 @@ dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) // This will most likely happend because shared_storage // is enabled if (resp == NULL) { + printf ("KVS response is NULL, which means we should skip dyad_pull\n"); DYAD_LOG_INFO (ctx, "The KVS response is NULL! Skipping dyad_pull!\n"); rc = DYAD_RC_OK; goto consume_done; } // Call dyad_pull to fetch the data from the producer's // Flux broker - rc = dyad_pull (ctx, fname, resp); + printf ("Calling dyad_pull\n"); + rc = dyad_pull (ctx, resp); // Regardless if there was an error in dyad_pull, // free the KVS response object + printf ("Freeing KVS response\n"); if (resp != NULL) { free (resp->fpath); free (resp); @@ -644,27 +890,37 @@ consume_done:; int dyad_finalize (dyad_ctx_t** ctx) { + printf ("Finalizing DYAD\n"); if (ctx == NULL || *ctx == NULL) { return DYAD_RC_OK; } + printf("Calling dyad_dtl_finalize\n"); + dyad_dtl_finalize (&(*ctx)->dtl_handle); if ((*ctx)->h != NULL) { + printf("Calling flux_close\n"); flux_close ((*ctx)->h); (*ctx)->h = NULL; } if ((*ctx)->kvs_namespace != NULL) { + printf("Freeing kvs_namespace\n"); free ((*ctx)->kvs_namespace); (*ctx)->kvs_namespace = NULL; } if ((*ctx)->prod_managed_path != NULL) { + printf("Freeing producer path\n"); free ((*ctx)->prod_managed_path); (*ctx)->prod_managed_path = NULL; } if ((*ctx)->cons_managed_path != NULL) { + printf("Freeing consumer path\n"); free ((*ctx)->cons_managed_path); (*ctx)->cons_managed_path = NULL; } + printf ("Freeing DYAD context\n"); free (*ctx); + printf ("Setting *ctx to NULL to avoid extra freeing\n"); *ctx = NULL; + printf ("All done with dyad_finalize\n"); return DYAD_RC_OK; } diff --git a/src/core/dyad_core.h b/src/core/dyad_core.h index 21ecdbbf..b705f470 100644 --- a/src/core/dyad_core.h +++ b/src/core/dyad_core.h @@ -5,6 +5,7 @@ #include "dyad_envs.h" #include "dyad_rc.h" #include "dyad_flux_log.h" +#include "dyad_dtl_defs.h" #ifdef __cplusplus #include @@ -33,6 +34,7 @@ extern "C" { */ struct dyad_ctx { flux_t* h; // the Flux handle for DYAD + dyad_dtl_t *dtl_handle; // Opaque handle to DTL info bool debug; // if true, perform debug logging bool check; // if true, perform some check logging bool reenter; // if false, do not recursively enter DYAD @@ -99,8 +101,16 @@ dyad_rc_t dyad_init (bool debug, const char* kvs_namespace, const char* prod_managed_path, const char* cons_managed_path, + dyad_dtl_mode_t dtl_mode, dyad_ctx_t** ctx); +/** + * @brief Intialize the DYAD context using environment variables + * @param[out] ctx the newly initialized context + * @return An error code + */ +dyad_rc_t dyad_init_env (dyad_ctx_t** ctx); + /** * @brief Wrapper function that performs all the common tasks needed * of a producer @@ -112,8 +122,7 @@ dyad_rc_t dyad_init (bool debug, #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -dyad_rc_t -dyad_produce (dyad_ctx_t* ctx, const char* fname); +dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname); /** * @brief Wrapper function that performs all the common tasks needed @@ -126,8 +135,7 @@ dyad_produce (dyad_ctx_t* ctx, const char* fname); #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -dyad_rc_t -dyad_consume (dyad_ctx_t* ctx, const char* fname); +dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname); /** * @brief Finalizes the DYAD instance and deallocates the context diff --git a/src/core/dyad_dtl_defs.h b/src/core/dyad_dtl_defs.h new file mode 100644 index 00000000..93421925 --- /dev/null +++ b/src/core/dyad_dtl_defs.h @@ -0,0 +1,24 @@ +#ifndef __DYAD_DTL_DEFS_H__ +#define __DYAD_DTL_DEFS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dyad_dtl dyad_dtl_t; + +enum dyad_dtl_mode { + DYAD_DTL_FLUX_RPC = 0, + DYAD_DTL_UCX = 1, + // TODO Figure out how to use Flux RPC + // as a fallback if other DTLs fail + // DYAD_DTL_UCX_W_FALLBACK, +}; + +typedef enum dyad_dtl_mode dyad_dtl_mode_t; + +#ifdef __cplusplus +} +#endif + +#endif /* __DYAD_DTL_DEFS_H__ */ diff --git a/src/core/dyad_envs.h b/src/core/dyad_envs.h index 422b37f1..86db96c3 100644 --- a/src/core/dyad_envs.h +++ b/src/core/dyad_envs.h @@ -4,6 +4,7 @@ #define DYAD_PATH_PRODUCER_ENV "DYAD_PATH_PRODUCER" #define DYAD_PATH_CONSUMER_ENV "DYAD_PATH_CONSUMER" #define DYAD_KVS_NAMESPACE_ENV "DYAD_KVS_NAMESPACE" +#define DYAD_DTL_MODE_ENV "DYAD_DTL_MODE" #define DYAD_SHARED_STORAGE_ENV "DYAD_SHARED_STORAGE" #define DYAD_KEY_DEPTH_ENV "DYAD_KEY_DEPTH" #define DYAD_KEY_BINS_ENV "DYAD_KEY_BINS" diff --git a/src/core/dyad_rc.h b/src/core/dyad_rc.h index c3ed2730..a9034281 100644 --- a/src/core/dyad_rc.h +++ b/src/core/dyad_rc.h @@ -3,15 +3,25 @@ enum dyad_core_return_codes { DYAD_RC_OK = 0, // Operation worked correctly - DYAD_RC_NOCTX = -1, // No DYAD Context found - DYAD_RC_FLUXFAIL = -2, // Some Flux function failed - DYAD_RC_BADCOMMIT = -3, // Flux KVS commit didn't work - DYAD_RC_BADLOOKUP = -4, // Flux KVS lookup didn't work - DYAD_RC_BADFETCH = -5, // Flux KVS commit didn't work - DYAD_RC_BADRESPONSE = -6, // Cannot create/populate a DYAD response - DYAD_RC_BADRPC = -7, // Flux RPC pack or get didn't work - DYAD_RC_BADFIO = -8, // File I/O failed - DYAD_RC_BADMANAGEDPATH = -9, // Cons or Prod Manged Path is bad + DYAD_RC_SYSFAIL = -1, // Some sys call or C standard + // library call failed + DYAD_RC_NOCTX = -2, // No DYAD Context found + DYAD_RC_FLUXFAIL = -3, // Some Flux function failed + DYAD_RC_BADCOMMIT = -4, // Flux KVS commit didn't work + DYAD_RC_BADLOOKUP = -5, // Flux KVS lookup didn't work + DYAD_RC_BADFETCH = -6, // Flux KVS commit didn't work + DYAD_RC_BADRESPONSE = -7, // Cannot create/populate a DYAD response + DYAD_RC_BADRPC = -8, // Flux RPC pack or get didn't work + DYAD_RC_BADFIO = -9, // File I/O failed + DYAD_RC_BADMANAGEDPATH = -10, // Cons or Prod Manged Path is bad + DYAD_RC_BADDTLMODE = -11, // Invalid DYAD DTL mode provided + DYAD_RC_BADPACK = -12, // JSON RPC payload packing failed + DYAD_RC_UCXINIT_FAIL = -13, // UCX initialization failed + DYAD_RC_UCXWAIT_FAIL = -14, // UCX wait (either custom or + // 'ucp_worker_wait') failed + DYAD_RC_UCXCOMM_FAIL = -15, // UCX communication routine failed + DYAD_RC_RPC_FINISHED = -16, // The Flux RPC responded with ENODATA (i.e., + // end of stream) sooner than expected }; typedef enum dyad_core_return_codes dyad_rc_t; diff --git a/src/modules/Makefile-urpc b/src/modules/Makefile-urpc deleted file mode 100644 index e55b367c..00000000 --- a/src/modules/Makefile-urpc +++ /dev/null @@ -1,40 +0,0 @@ -CONFIG = ../urpc.cfg -include ${CONFIG} - -ifeq ($(URPC_DEBUG),1) - URPC_OPTIONS += -DURPC_FULL_DEBUG=1 - URPC_OPTIONS += -DURPC_LOGGING_ON=1 - $(info URPC_DEBUG is enabled) -endif - -all: urpc.so - +$(MAKE) -C test - -LIBB64_DIR = /p/lustre2/yeom2/FLUX/libb64 -_CFLAGS = $(CFLAGS) $(C_VER) $(URPC_OPTIONS) -I../common -I$(FLUX_CORE_INCLUDES) -I$(LIBB64_DIR)/include -_LDFLAGS += $(LDFLAGS) -L$(FLUX_CORE_LIBPATH) -Wl,-rpath=$(FLUX_CORE_LIBPATH),--no-undefined -shared $(FLUX_CORE_LIBS) -ldl -ljansson -L$(LIBB64_DIR)/src -lb64 -# --disable-static - -urpc.so: urpc.o ../common/utils.o read_all.o - $(CC) $(_CFLAGS) $^ -o $@ $(_LDFLAGS) - #$(CC) $(_CFLAGS) -L$(FLUX_CORE_LIBPATH) -Wl,-rpath=$(FLUX_CORE_LIBPATH),--no-undefined -shared $^ -o $@ $(FLUX_CORE_LIBS) -ldl - -urpc.o: urpc.c urpc_ctx.h - $(CC) $(_CFLAGS) $< -c -o $@ - -read_all.o: read_all.c - $(CC) $(_CFLAGS) $^ -c -o $@ - -start: urpc.so - flux module load ./urpc.so $(shell readlink -f ../wrapper/test)/prod - - -.PHONY: clean install - -install: urpc.so - install -d ${PREFIX}/lib - install -m 640 urpc.so ${PREFIX}/lib - -clean: - @rm -f *.o *.so - +$(MAKE) clean -C test diff --git a/src/modules/Makefile.am b/src/modules/Makefile.am index a57f214e..d384eefc 100644 --- a/src/modules/Makefile.am +++ b/src/modules/Makefile.am @@ -1,13 +1,33 @@ lib_LTLIBRARIES = dyad.la -dyad_la_SOURCES = dyad.c read_all.c dyad_ctx.h read_all.h +dyad_la_SOURCES = \ + dtl/dyad_mod_dtl.c \ + dtl/dyad_mod_dtl.h \ + dtl/flux_mod_dtl.c \ + dtl/flux_mod_dtl.h \ + dtl/ucx_mod_dtl.c \ + dtl/ucx_mod_dtl.h \ + dyad.c \ + dyad_flux_log.h dyad_la_LDFLAGS = \ $(AM_LDFLAGS) \ -module \ -avoid-version \ -no-undefined \ - -export-symbols-regex '^mod_(name|main)$$' -dyad_la_LIBADD = $(top_builddir)/src/utils/libutils.la $(FLUX_CORE_LIBS) -dyad_la_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/utils $(FLUX_CORE_CFLAGS) + -shared \ + -export-symbols-regex '^mod_(name|main)$$' \ + -Wl,-rpath,'$(UCX_LIBDIR)' +dyad_la_LIBADD = \ + $(top_builddir)/src/utils/libutils.la \ + $(UCX_LIBS) \ + $(JANSSON_LIBS) \ + $(FLUX_CORE_LIBS) +dyad_la_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_srcdir)/src/utils \ + -I$(top_srcdir)/src/utils/base64 \ + $(UCX_CFLAGS) \ + $(JANSSON_CFLAGS) \ + $(FLUX_CORE_CFLAGS) if PERFFLOW dyad_la_LIBADD += $(PERFFLOW_LIBS) dyad_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 @@ -15,7 +35,7 @@ endif if URPC lib_LTLIBRARIES += urpc.la -urpc_la_SOURCES = read_all.c urpc.c urpc_ctx.h +urpc_la_SOURCES = urpc.c urpc_ctx.h # TODO replace current LDFLAGS rule with this one once libb64 is required # urpc_la_LDFLAGS = -module -avoid-version -no-undefined -L$(LIBB64_DIR)/src urpc_la_LDFLAGS = -module -avoid-version -no-undefined @@ -32,4 +52,4 @@ endif endif install-exec-hook: - @(cd $(DESTDIR)$(libdir) && $(RM) dyad.la dyad.a) + @(cd $(DESTDIR)$(libdir) && $(RM) dyad.la) diff --git a/src/modules/dtl/dyad_mod_dtl.c b/src/modules/dtl/dyad_mod_dtl.c new file mode 100644 index 00000000..7fa9300c --- /dev/null +++ b/src/modules/dtl/dyad_mod_dtl.c @@ -0,0 +1,169 @@ +#include "dyad_mod_dtl.h" + +#include "ucx_mod_dtl.h" +#include "flux_mod_dtl.h" + +struct dyad_mod_dtl { + dyad_mod_dtl_mode_t mode; + flux_t *h; + void *real_handle; +}; + +int dyad_mod_dtl_init(dyad_mod_dtl_mode_t mode, + flux_t *h, bool debug, + dyad_mod_dtl_t **dtl_handle) +{ + *dtl_handle = malloc(sizeof(struct dyad_mod_dtl)); + if (*dtl_handle == NULL) + { + FLUX_LOG_ERR (h, "Could not allocate a dyad_mode_dtl_t object\n"); + return -1; + } + (*dtl_handle)->mode = mode; + (*dtl_handle)->h = h; + if (mode == DYAD_DTL_UCX) + { + FLUX_LOG_INFO (h, "Initializing UCX DTL!\n"); + return dyad_mod_ucx_dtl_init( + h, + debug, + (dyad_mod_ucx_dtl_t**)&(*dtl_handle)->real_handle + ); + } + if (mode == DYAD_DTL_FLUX_RPC) + { + FLUX_LOG_INFO (h, "Initializing Flux RPC DTL!\n"); + return dyad_mod_flux_dtl_init( + h, + debug, + (dyad_mod_flux_dtl_t**)&(*dtl_handle)->real_handle + ); + } + FLUX_LOG_ERR (h, "Invalid DYAD DTL mode: %d\n", (int) mode); + return -1; +} + +int dyad_mod_dtl_rpc_unpack(dyad_mod_dtl_t *dtl_handle, + const flux_msg_t *packed_obj, char **upath) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) + { + return dyad_mod_ucx_dtl_rpc_unpack( + (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle, + packed_obj, + upath + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) + { + return dyad_mod_flux_dtl_rpc_unpack( + (dyad_mod_flux_dtl_t*)dtl_handle->real_handle, + packed_obj, + upath + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); + return -1; +} + +int dyad_mod_dtl_rpc_respond(dyad_mod_dtl_t *dtl_handle, const flux_msg_t *orig_msg) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) + { + return dyad_mod_ucx_dtl_rpc_respond( + (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle, + orig_msg + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) + { + return dyad_mod_flux_dtl_rpc_respond( + (dyad_mod_flux_dtl_t*)dtl_handle->real_handle, + orig_msg + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); + return -1; +} + +int dyad_mod_dtl_establish_connection(dyad_mod_dtl_t *dtl_handle) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) + { + return dyad_mod_ucx_dtl_establish_connection( + (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) + { + return dyad_mod_flux_dtl_establish_connection( + (dyad_mod_flux_dtl_t*)dtl_handle->real_handle + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); + return -1; +} + +int dyad_mod_dtl_send(dyad_mod_dtl_t *dtl_handle, void *buf, size_t buflen) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) + { + return dyad_mod_ucx_dtl_send( + (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle, + buf, + buflen + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) + { + return dyad_mod_flux_dtl_send( + (dyad_mod_flux_dtl_t*)dtl_handle->real_handle, + buf, + buflen + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); + return -1; +} + +int dyad_mod_dtl_close_connection(dyad_mod_dtl_t *dtl_handle) +{ + if (dtl_handle->mode == DYAD_DTL_UCX) + { + return dyad_mod_ucx_dtl_close_connection( + (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle + ); + } + if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) + { + return dyad_mod_flux_dtl_close_connection( + (dyad_mod_flux_dtl_t*)dtl_handle->real_handle + ); + } + FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); + return -1; +} + +int dyad_mod_dtl_finalize(dyad_mod_dtl_t **dtl_handle) +{ + if (dtl_handle == NULL || *dtl_handle == NULL) + return 0; + dyad_mod_dtl_mode_t mode = (*dtl_handle)->mode;; + flux_t *h = (*dtl_handle)->h; + void* real_handle = (*dtl_handle)->real_handle; + free(*dtl_handle); + if (mode == DYAD_DTL_UCX) + { + return dyad_mod_ucx_dtl_finalize( + (dyad_mod_ucx_dtl_t*)real_handle + ); + } + if (mode == DYAD_DTL_FLUX_RPC) + { + return dyad_mod_flux_dtl_finalize( + (dyad_mod_flux_dtl_t*)real_handle + ); + } + FLUX_LOG_ERR (h, "Invalid DYAD DTL mode: %d\n", (int) mode); + return -1; +} diff --git a/src/modules/dtl/dyad_mod_dtl.h b/src/modules/dtl/dyad_mod_dtl.h new file mode 100644 index 00000000..3e68a785 --- /dev/null +++ b/src/modules/dtl/dyad_mod_dtl.h @@ -0,0 +1,42 @@ +#ifndef __DYAD_MOD_DTL_H__ +#define __DYAD_MOD_DTL_H__ + +#include + +#if defined(__cplusplus) +#include +#else +#include +#include +#endif /* defined(__cplusplus) */ + +typedef struct dyad_mod_dtl dyad_mod_dtl_t; + +enum dyad_mod_dtl_mode { + DYAD_DTL_FLUX_RPC, + DYAD_DTL_UCX, + // TODO Figure out how to use Flux RPC + // as a fallback for if UCX fails + // DYAD_DTL_UCX_W_FALLBACK, +}; + +typedef enum dyad_mod_dtl_mode dyad_mod_dtl_mode_t; + +int dyad_mod_dtl_init(dyad_mod_dtl_mode_t mode, flux_t *h, + bool debug, dyad_mod_dtl_t **dtl_handle); + +int dyad_mod_dtl_rpc_unpack(dyad_mod_dtl_t *dtl_handle, + const flux_msg_t *packed_obj, char **upath); + +int dyad_mod_dtl_rpc_respond(dyad_mod_dtl_t *dtl_handle, + const flux_msg_t *orig_msg); + +int dyad_mod_dtl_establish_connection(dyad_mod_dtl_t *dtl_handle); + +int dyad_mod_dtl_send(dyad_mod_dtl_t *dtl_handle, void *buf, size_t buflen); + +int dyad_mod_dtl_close_connection(dyad_mod_dtl_t *dtl_handle); + +int dyad_mod_dtl_finalize(dyad_mod_dtl_t **dtl_handle); + +#endif /* __DYAD_MOD_DTL_H__ */ diff --git a/src/modules/dtl/flux_mod_dtl.c b/src/modules/dtl/flux_mod_dtl.c new file mode 100644 index 00000000..b8222374 --- /dev/null +++ b/src/modules/dtl/flux_mod_dtl.c @@ -0,0 +1,82 @@ +#include "flux_mod_dtl.h" + +int dyad_mod_flux_dtl_init(flux_t *h, bool debug, + dyad_mod_flux_dtl_t **dtl_handle) +{ + *dtl_handle = malloc(sizeof(struct dyad_mod_flux_dtl)); + if (*dtl_handle == NULL) + { + FLUX_LOG_ERR(h, "Cannot allocate a context for the Flux RPC DTL\n"); + return -1; + } + (*dtl_handle)->h = h; + (*dtl_handle)->debug = debug; + (*dtl_handle)->msg = NULL; + return 0; +} + +int dyad_mod_flux_dtl_rpc_unpack(dyad_mod_flux_dtl_t *dtl_handle, + const flux_msg_t *packed_obj, char **upath) +{ + int errcode = flux_request_unpack( + packed_obj, + NULL, + "{s:s}", + "upath", + upath + ); + if (errcode < 0) + { + FLUX_LOG_ERR(dtl_handle->h, "Could not unpack Flux message from consumer!\n"); + return -1; + } + // Save the flux_msg_t object here instead of dyad_mod_flux_dtl_rpc_respond + // to increase the odds that the compiler will optimize rpc_respond away + dtl_handle->msg = packed_obj; + return 0; +} + +int dyad_mod_flux_dtl_rpc_respond (dyad_mod_flux_dtl_t *dtl_handle, + const flux_msg_t *orig_msg) +{ + return 0; +} + +int dyad_mod_flux_dtl_establish_connection(dyad_mod_flux_dtl_t *dtl_handle) +{ + return 0; +} + +int dyad_mod_flux_dtl_send(dyad_mod_flux_dtl_t *dtl_handle, void *buf, size_t buflen) +{ + FLUX_LOG_INFO (dtl_handle->h, "Send data to consumer using a Flux RPC response"); + int errcode = flux_respond_raw(dtl_handle->h, dtl_handle->msg, buf, (int)buflen); + if (errcode < 0) + { + FLUX_LOG_ERR(dtl_handle->h, "Could not send Flux RPC response containing file contents!\n"); + return -1; + } + if (dtl_handle->debug) + { + FLUX_LOG_INFO(dtl_handle->h, "Successfully sent file contents to consumer!\n"); + } + return 0; +} + +int dyad_mod_flux_dtl_close_connection(dyad_mod_flux_dtl_t *dtl_handle) +{ + if (dtl_handle != NULL) + { + dtl_handle->msg = NULL; + } + return 0; +} + +int dyad_mod_flux_dtl_finalize(dyad_mod_flux_dtl_t *dtl_handle) +{ + if (dtl_handle != NULL) + { + free(dtl_handle); + } + return 0; +} diff --git a/src/modules/dtl/flux_mod_dtl.h b/src/modules/dtl/flux_mod_dtl.h new file mode 100644 index 00000000..2a3ecdf5 --- /dev/null +++ b/src/modules/dtl/flux_mod_dtl.h @@ -0,0 +1,38 @@ +#ifndef __DYAD_MOD_FLUX_DTL_H__ +#define __DYAD_MOD_FLUX_DTL_H__ + +#include "dyad_flux_log.h" + +#ifdef __cplusplus +#include +#else +#include +#include +#endif + +struct dyad_mod_flux_dtl { + flux_t *h; + bool debug; + const flux_msg_t *msg; +}; + +typedef struct dyad_mod_flux_dtl dyad_mod_flux_dtl_t; + +int dyad_mod_flux_dtl_init(flux_t *h, bool debug, + dyad_mod_flux_dtl_t **dtl_handle); + +int dyad_mod_flux_dtl_rpc_unpack(dyad_mod_flux_dtl_t *dtl_handle, + const flux_msg_t *packed_obj, char **upath); + +int dyad_mod_flux_dtl_rpc_respond (dyad_mod_flux_dtl_t *dtl_handle, + const flux_msg_t *orig_msg); + +int dyad_mod_flux_dtl_establish_connection(dyad_mod_flux_dtl_t *dtl_handle); + +int dyad_mod_flux_dtl_send(dyad_mod_flux_dtl_t *dtl_handle, void *buf, size_t buflen); + +int dyad_mod_flux_dtl_close_connection(dyad_mod_flux_dtl_t *dtl_handle); + +int dyad_mod_flux_dtl_finalize(dyad_mod_flux_dtl_t *dtl_handle); + +#endif /* __DYAD_MOD_FLUX_DTL_H__ */ diff --git a/src/modules/dtl/ucx_mod_dtl.c b/src/modules/dtl/ucx_mod_dtl.c new file mode 100644 index 00000000..f9f87d78 --- /dev/null +++ b/src/modules/dtl/ucx_mod_dtl.c @@ -0,0 +1,347 @@ +#include "ucx_mod_dtl.h" + +#include "base64.h" + +// Get base64_maps_rfc4648 from flux-core +extern const base64_maps_t base64_maps_rfc4648; + +#define UCX_CHECK(status_code) status_code != UCS_OK + +#if !defined(UCP_API_VERSION) +#error Due to UCP API changes, we must be able to determine the version of UCP! \ + Please use a version of UCX with the UCP_API_VERSION macro defined! +#endif + +struct mod_request { + int completed; +}; +typedef struct mod_request mod_request_t; + +static void dyad_mod_ucx_request_init(void *request) +{ + mod_request_t *real_request = (mod_request_t*)request; + real_request->completed = 0; +} + +#if UCP_API_VERSION >= UCP_VERSION(1, 10) +static void dyad_ucx_send_handler(void *req, ucs_status_t status, void *ctx) +#else +static void dyad_ucx_send_handler(void *req, ucs_status_t status) +#endif +{ + mod_request_t *real_req = (mod_request_t*)req; + real_req->completed = 1; +} + +void dyad_mod_ucx_ep_err_handler (void *arg, ucp_ep_h ep, ucs_status_t status) { + flux_t *h = (flux_t*)arg; + FLUX_LOG_ERR (h, "An error occured on the UCP endpoint (status = %d)\n", status); +} + +int dyad_mod_ucx_dtl_init(flux_t *h, bool debug, dyad_mod_ucx_dtl_t **dtl_handle) +{ + ucp_params_t ucp_params; + ucp_worker_params_t worker_params; + ucp_config_t *config = NULL; + ucs_status_t status = UCS_OK; + *dtl_handle = malloc(sizeof(struct dyad_mod_ucx_dtl)); + (*dtl_handle)->h = h; + (*dtl_handle)->debug = debug; + (*dtl_handle)->ucx_ctx = NULL; + (*dtl_handle)->ucx_worker = NULL; + (*dtl_handle)->curr_ep = NULL; + (*dtl_handle)->curr_cons_addr = NULL; + (*dtl_handle)->addr_len = 0; + (*dtl_handle)->curr_comm_tag = 0; + FLUX_LOG_INFO (h, "Reading UCP config for DTL\n"); + status = ucp_config_read(NULL, NULL, &config); + if (UCX_CHECK(status)) + { + FLUX_LOG_ERR(h, "Could not read UCP config for data transport!\n"); + goto ucx_init_error; + } + ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_SIZE | + UCP_PARAM_FIELD_REQUEST_INIT; + ucp_params.features = UCP_FEATURE_TAG | + UCP_FEATURE_RMA | + UCP_FEATURE_WAKEUP; + ucp_params.request_size = sizeof(struct mod_request); + ucp_params.request_init = dyad_mod_ucx_request_init; + FLUX_LOG_INFO (h, "Initializing UCX\n"); + status = ucp_init(&ucp_params, config, &((*dtl_handle)->ucx_ctx)); + if (debug) + { + ucp_config_print( + config, + stderr, + "UCX Configuration", + UCS_CONFIG_PRINT_CONFIG + ); + } + ucp_config_release(config); + if (UCX_CHECK(status)) + { + FLUX_LOG_ERR(h, "Could not initialize UCX for data transport!\n"); + goto ucx_init_error; + } + // Flux modules are single-threaded, so enable single-thread mode in UCX + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE | + UCP_WORKER_PARAM_FIELD_EVENTS; + worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + worker_params.events = UCP_WAKEUP_TAG_SEND; + FLUX_LOG_INFO (h, "Creating UCP worker\n"); + status = ucp_worker_create( + (*dtl_handle)->ucx_ctx, + &worker_params, + &(*dtl_handle)->ucx_worker + ); + if (UCX_CHECK(status)) + { + FLUX_LOG_ERR(h, "Could not create UCP worker for data transport!\n"); + goto ucx_init_error; + } + FLUX_LOG_INFO (h, "UCX initialization successful\n"); + return 0; + +ucx_init_error:; + dyad_mod_ucx_dtl_finalize(*dtl_handle); + return -1; +} + +int dyad_mod_ucx_dtl_rpc_unpack(dyad_mod_ucx_dtl_t *dtl_handle, + const flux_msg_t *packed_obj, char **upath) +{ + char* enc_addr = NULL; + size_t enc_addr_len = 0; + int errcode = 0; + uint32_t tag_prod = 0; + uint32_t tag_cons = 0; + ssize_t decoded_len = 0; + FLUX_LOG_INFO (dtl_handle->h, "Unpacking RPC payload\n"); + errcode = flux_request_unpack(packed_obj, + NULL, + "{s:s, s:i, s:i, s:s%}", + "upath", + upath, + "tag_prod", + &tag_prod, + "tag_cons", + &tag_cons, + "ucx_addr", + &enc_addr, + &enc_addr_len + ); + if (errcode < 0) + { + FLUX_LOG_ERR(dtl_handle->h, "Could not unpack Flux message from consumer!\n"); + return -1; + } + dtl_handle->curr_comm_tag = ((uint64_t)tag_prod << 32) | (uint64_t)tag_cons; + FLUX_LOG_INFO (dtl_handle->h, "Obtained upath from RPC payload: %s\n", upath); + FLUX_LOG_INFO (dtl_handle->h, "Obtained UCP tag from RPC payload: %lu\n", dtl_handle->curr_comm_tag); + FLUX_LOG_INFO (dtl_handle->h, "Decoding consumer UCP address using base64\n"); + dtl_handle->addr_len = base64_decoded_length(enc_addr_len); + dtl_handle->curr_cons_addr = (ucp_address_t*) malloc(dtl_handle->addr_len); + decoded_len = base64_decode_using_maps (&base64_maps_rfc4648, + (char*)dtl_handle->curr_cons_addr, dtl_handle->addr_len, + enc_addr, enc_addr_len); + if (decoded_len < 0) + { + // TODO log error + free(dtl_handle->curr_cons_addr); + dtl_handle->curr_cons_addr = NULL; + dtl_handle->addr_len = 0; + return -1; + } + return 0; +} + +int dyad_mod_ucx_dtl_rpc_respond (dyad_mod_ucx_dtl_t *dtl_handle, + const flux_msg_t *orig_msg) +{ + return 0; +} + +int dyad_mod_ucx_dtl_establish_connection(dyad_mod_ucx_dtl_t *dtl_handle) +{ + ucp_ep_params_t params; + ucs_status_t status = UCS_OK; + params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | + UCP_EP_PARAM_FIELD_ERR_HANDLER; + params.address = dtl_handle->curr_cons_addr; + params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + params.err_handler.cb = dyad_mod_ucx_ep_err_handler; + params.err_handler.arg = (void*) dtl_handle->h; + FLUX_LOG_INFO (dtl_handle->h, "Create UCP endpoint for communication with consumer\n"); + status = ucp_ep_create( + dtl_handle->ucx_worker, + ¶ms, + &dtl_handle->curr_ep + ); + if (UCX_CHECK(status)) + { + return -1; + } + if (dtl_handle->debug) + { + ucp_ep_print_info(dtl_handle->curr_ep, stderr); + } + return 0; +} + +int dyad_mod_ucx_dtl_send(dyad_mod_ucx_dtl_t *dtl_handle, void *buf, size_t buflen) +{ + ucs_status_ptr_t stat_ptr; + ucs_status_t status = UCS_OK; + mod_request_t *req = NULL; + if (dtl_handle->curr_ep == NULL) + { + FLUX_LOG_INFO(dtl_handle->h, "UCP endpoint was not created prior to invoking send!\n"); + return 1; + } + // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 + // However, some systems (e.g., Lassen) may have an older verison + // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, + // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. +#if UCP_API_VERSION >= UCP_VERSION(1, 10) + ucp_request_param_t params; + params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; + params.cb.send = dyad_ucx_send_handler; + FLUX_LOG_INFO (dtl_handle->h, "Sending data to consumer with ucp_tag_send_nbx\n"); + stat_ptr = ucp_tag_send_nbx( + dtl_handle->curr_ep, + buf, + buflen, + dtl_handle->curr_comm_tag, + ¶ms + ); +#else + FLUX_LOG_INFO (dtl_handle->h, "Sending data to consumer with ucp_tag_send_nb\n"); + stat_ptr = ucp_tag_send_nb( + dtl_handle->curr_ep, + buf, + buflen, + UCP_DATATYPE_CONTIG, + dtl_handle->curr_comm_tag, + dyad_ucx_send_handler + ); +#endif + FLUX_LOG_INFO (dtl_handle->h, "Processing UCP send request\n"); + if (UCS_PTR_IS_ERR(stat_ptr)) + { + FLUX_LOG_INFO (dtl_handle->h, "Error occured in UCP send\n"); + status = UCS_PTR_STATUS(stat_ptr); + } + else if (UCS_PTR_IS_PTR(stat_ptr)) + { + FLUX_LOG_INFO (dtl_handle->h, "Waiting for send to complete\n"); + req = (mod_request_t*)stat_ptr; + while (!req->completed) + { + ucp_worker_progress(dtl_handle->ucx_worker); + } + req->completed = 0; + status = ucp_request_check_status(req); + ucp_request_free(req); + } + else + { + FLUX_LOG_INFO (dtl_handle->h, "UCP send completed immediately\n"); + status = UCS_OK; + } + if (UCX_CHECK(status)) + { + FLUX_LOG_ERR (dtl_handle->h, "UCP Tag Send failed (status = %d)!\n", (int)status); + return -1; + } + FLUX_LOG_INFO (dtl_handle->h, "Data send with UCP succeeded\n"); + return 0; +} + +int dyad_mod_ucx_dtl_close_connection(dyad_mod_ucx_dtl_t *dtl_handle) +{ + ucs_status_t status = UCS_OK; + ucs_status_ptr_t stat_ptr; + if (dtl_handle != NULL) + { + if (dtl_handle->curr_ep != NULL) + { + // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 + // However, some systems (e.g., Lassen) may have an older verison + // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, + // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. + FLUX_LOG_INFO (dtl_handle->h, "Start async closing of UCP endpoint\n"); +#if UCP_API_VERSION >= UCP_VERSION(1, 10) + ucp_request_param_t close_params; + close_params.op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS; + close_params.flags = UCP_EP_CLOSE_FLAG_FORCE; + stat_ptr = ucp_ep_close_nbx(dtl_handle->curr_ep, &close_params); +#else + // TODO change to FORCE if we decide to enable err handleing mode + stat_ptr = ucp_ep_close_nb(dtl_handle->curr_ep, UCP_EP_CLOSE_MODE_FORCE); +#endif + FLUX_LOG_INFO (dtl_handle->h, "Wait for endpoint closing to finish\n"); + if (stat_ptr != NULL) + { + // Endpoint close is in-progress. + // Wait until finished + if (UCS_PTR_IS_PTR(stat_ptr)) + { + do { + ucp_worker_progress(dtl_handle->ucx_worker); + status = ucp_request_check_status(stat_ptr); + } while (status == UCS_INPROGRESS); + ucp_request_free(stat_ptr); + } + // An error occurred during endpoint closure + // However, the endpoint can no longer be used + // Get the status code for reporting + else + { + status = UCS_PTR_STATUS(stat_ptr); + } + if (UCX_CHECK(status)) + { + FLUX_LOG_ERR(dtl_handle->h, "Could not successfully close Endpoint (status = %d)! However, endpoint was released.\n", status); + } + } + dtl_handle->curr_ep = NULL; + } + if (dtl_handle->curr_cons_addr != NULL) + { + free(dtl_handle->curr_cons_addr); + dtl_handle->curr_cons_addr = NULL; + dtl_handle->addr_len = 0; + } + dtl_handle->curr_comm_tag = 0; + } + FLUX_LOG_INFO (dtl_handle->h, "UCP endpoint close successful\n"); + return 0; +} + +int dyad_mod_ucx_dtl_finalize(dyad_mod_ucx_dtl_t *dtl_handle) +{ + if (dtl_handle != NULL) + { + if (dtl_handle->curr_cons_addr != NULL || dtl_handle->curr_ep != NULL) + { + dyad_mod_ucx_dtl_close_connection(dtl_handle); + } + if (dtl_handle->ucx_worker != NULL) + { + ucp_worker_destroy(dtl_handle->ucx_worker); + dtl_handle->ucx_worker = NULL; + } + if (dtl_handle->ucx_ctx != NULL) + { + ucp_cleanup(dtl_handle->ucx_ctx); + dtl_handle->ucx_ctx = NULL; + } + dtl_handle->h = NULL; + free(dtl_handle); + dtl_handle = NULL; + } + return 0; +} diff --git a/src/modules/dtl/ucx_mod_dtl.h b/src/modules/dtl/ucx_mod_dtl.h new file mode 100644 index 00000000..f495ba04 --- /dev/null +++ b/src/modules/dtl/ucx_mod_dtl.h @@ -0,0 +1,43 @@ +#ifndef __DYAD_MOD_UCX_DTL_H__ +#define __DYAD_MOD_UCX_DTL_H__ + +#include "dyad_flux_log.h" +#include + +#if defined(__cplusplus) +#include +#else +#include +#include +#endif + +struct dyad_mod_ucx_dtl { + flux_t *h; + bool debug; + ucp_context_h ucx_ctx; + ucp_worker_h ucx_worker; + ucp_ep_h curr_ep; + ucp_address_t *curr_cons_addr; + size_t addr_len; + ucp_tag_t curr_comm_tag; +}; + +typedef struct dyad_mod_ucx_dtl dyad_mod_ucx_dtl_t; + +int dyad_mod_ucx_dtl_init(flux_t *h, bool debug, dyad_mod_ucx_dtl_t **dtl_handle); + +int dyad_mod_ucx_dtl_rpc_unpack(dyad_mod_ucx_dtl_t *dtl_handle, + const flux_msg_t *packed_obj, char **upath); + +int dyad_mod_ucx_dtl_rpc_respond (dyad_mod_ucx_dtl_t *dtl_handle, + const flux_msg_t *orig_msg); + +int dyad_mod_ucx_dtl_establish_connection(dyad_mod_ucx_dtl_t *dtl_handle); + +int dyad_mod_ucx_dtl_send(dyad_mod_ucx_dtl_t *dtl_handle, void *buf, size_t buflen); + +int dyad_mod_ucx_dtl_close_connection(dyad_mod_ucx_dtl_t *dtl_handle); + +int dyad_mod_ucx_dtl_finalize(dyad_mod_ucx_dtl_t *dtl_handle); + +#endif /* __DYAD_MOD_UCX_DTL_H__ */ diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 036f077b..c8d243c5 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -22,15 +22,15 @@ #include #include #include +#include #endif // defined(__cplusplus) #include -#include #include #include #include -#include "dyad_ctx.h" +#include "dtl/dyad_mod_dtl.h" #include "read_all.h" #include "utils.h" @@ -39,17 +39,23 @@ - (Tstart).tv_nsec) \ / 1000000000L) -#if !defined(DYAD_LOGGING_ON) || (DYAD_LOGGING_ON == 0) -#define FLUX_LOG_INFO(...) \ - do { \ - } while (0) -#define FLUX_LOG_ERR(...) \ - do { \ - } while (0) -#else -#define FLUX_LOG_INFO(h, ...) flux_log (h, LOG_INFO, __VA_ARGS__) -#define FLUX_LOG_ERR(h, ...) flux_log_error (h, __VA_ARGS__) -#endif +struct dyad_mod_ctx { + flux_t *h; + bool debug; + flux_msg_handler_t **handlers; + const char *dyad_path; + dyad_mod_dtl_t *dtl_handle; +}; + +const struct dyad_mod_ctx dyad_mod_ctx_default = { + NULL, + false, + NULL, + NULL, + NULL +}; + +typedef struct dyad_mod_ctx dyad_mod_ctx_t; static void dyad_mod_fini (void) __attribute__ ((destructor)); @@ -65,6 +71,10 @@ static void freectx (void *arg) { dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)arg; flux_msg_handler_delvec (ctx->handlers); + if (ctx->dtl_handle != NULL) { + dyad_mod_dtl_finalize (&(ctx->dtl_handle)); + ctx->dtl_handle = NULL; + } free (ctx); } @@ -73,51 +83,37 @@ static dyad_mod_ctx_t *getctx (flux_t *h) dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)flux_aux_get (h, "dyad"); if (!ctx) { - ctx = (dyad_mod_ctx_t *)malloc (sizeof (*ctx)); + ctx = (dyad_mod_ctx_t *) malloc (sizeof (*ctx)); ctx->h = h; ctx->debug = false; ctx->handlers = NULL; ctx->dyad_path = NULL; + ctx->dtl_handle = NULL; if (flux_aux_set (h, "dyad", ctx, freectx) < 0) { FLUX_LOG_ERR (h, "DYAD_MOD: flux_aux_set() failed!\n"); - goto error; + goto getctx_error; } } - goto done; + goto getctx_done; -error:; +getctx_error:; return NULL; -done: +getctx_done: return ctx; } -#if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) void dyad_respond ( - flux_t *h, - const flux_msg_t *msg, - const void *inbuf, - size_t inlen) -{ - if (flux_respond_raw (h, msg, inbuf, inlen) < 0) { - FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond", __FUNCTION__); - } else { - FLUX_LOG_INFO (h, "DYAD_MOD: dyad_fetch_request_cb() served\n"); - } -} -#endif // DYAD_PERFFLOW - /* request callback called when dyad.fetch request is invoked */ #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -static void -dyad_fetch_request_cb (flux_t *h, - flux_msg_handler_t *w, - const flux_msg_t *msg, - void *arg) +static void dyad_fetch_request_cb (flux_t *h, + flux_msg_handler_t *w, + const flux_msg_t *msg, + void *arg) { + FLUX_LOG_INFO (h, "Launched callback for dyad.fetch\n"); dyad_mod_ctx_t *ctx = getctx (h); ssize_t inlen = 0; void *inbuf = NULL; @@ -126,15 +122,31 @@ dyad_fetch_request_cb (flux_t *h, char *upath = NULL; char fullpath[PATH_MAX + 1] = {'\0'}; int saved_errno = errno; - errno = 0; + int rc = 0; + + if (!flux_msg_is_streaming (msg)) { + errno = EPROTO; + goto fetch_error; + } if (flux_msg_get_userid (msg, &userid) < 0) - goto error; + goto fetch_error; - if (flux_request_unpack (msg, NULL, "{s:s}", "upath", &upath) < 0) - goto error; + FLUX_LOG_INFO (h, "DYAD_MOD: unpacking RPC message"); + + if (dyad_mod_dtl_rpc_unpack (ctx->dtl_handle, msg, &upath) < 0) { + FLUX_LOG_ERR (ctx->h, "Could not unpack message from client\n"); + errno = EPROTO; + goto fetch_error; + } FLUX_LOG_INFO (h, "DYAD_MOD: requested user_path: %s", upath); + FLUX_LOG_INFO (h, "DYAD_MOD: sending initial response to consumer"); + + if (dyad_mod_dtl_rpc_respond (ctx->dtl_handle, msg) < 0) { + FLUX_LOG_ERR (ctx->h, "Could not send primary RPC response to client\n"); + goto fetch_error; + } strncpy (fullpath, ctx->dyad_path, PATH_MAX - 1); concat_str (fullpath, upath, "/", PATH_MAX); @@ -147,52 +159,65 @@ dyad_fetch_request_cb (flux_t *h, } #endif // DYAD_SPIN_WAIT + FLUX_LOG_INFO (h, "Reading file %s for transfer", fullpath); fd = open (fullpath, O_RDONLY); if (fd < 0) { FLUX_LOG_ERR (h, "DYAD_MOD: Failed to open file \"%s\".\n", fullpath); - goto error; + goto fetch_error; } if ((inlen = read_all (fd, &inbuf)) < 0) { FLUX_LOG_ERR (h, "DYAD_MOD: Failed to load file \"%s\".\n", fullpath); - goto error; + goto fetch_error; } close (fd); - goto done; + FLUX_LOG_INFO (h, "Establish DTL connection with consumer"); + if (dyad_mod_dtl_establish_connection (ctx->dtl_handle) < 0) { + FLUX_LOG_ERR (ctx->h, "Could not establish DTL connection with client\n"); + errno = ECONNREFUSED; + goto fetch_error; + } + FLUX_LOG_INFO (h, "Send file to consumer with DTL"); + rc = dyad_mod_dtl_send (ctx->dtl_handle, inbuf, inlen); + FLUX_LOG_INFO (h, "Close DTL connection with consumer"); + dyad_mod_dtl_close_connection (ctx->dtl_handle); + free(inbuf); + if (rc < 0) { + FLUX_LOG_ERR (ctx->h, "Could not send data to client via DTL\n"); + errno = ECOMM; + goto fetch_error; + } -error: - if (flux_respond_error (h, msg, errno, NULL) < 0) { - FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond_error", __FUNCTION__); + FLUX_LOG_INFO (h, "Close RPC message stream with an ENODATA message"); + if (flux_respond_error (h, msg, ENODATA, NULL) < 0) { + FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond_error with ENODATA failed\n", __FUNCTION__); } + errno = saved_errno; + FLUX_LOG_INFO (h, "Finished dyad.fetch module invocation\n"); return; -done: -#if DYAD_PERFFLOW - dyad_respond (h, msg, inbuf, inlen); -#else - // if (flux_respond_raw (h, msg, errno, inbuf, inlen) < 0) - if (flux_respond_raw (h, msg, inbuf, inlen) < 0) { - FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond", __FUNCTION__); - } else { - FLUX_LOG_INFO (h, "DYAD_MOD: dyad_fetch_request_cb() served %s\n", - fullpath); +fetch_error: + FLUX_LOG_INFO (h, "Close RPC message stream with an error (errno = %d)\n", errno); + if (flux_respond_error (h, msg, errno, NULL) < 0) { + FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond_error", __FUNCTION__); } - // TODO: check if flux_respond_raw deallocates inbuf. - // If not, deallocate it here -#endif // DYAD_PERFFLOW errno = saved_errno; return; } -static int dyad_open (flux_t *h) +static int dyad_open (flux_t *h, dyad_mod_dtl_mode_t dtl_mode, bool debug) { dyad_mod_ctx_t *ctx = getctx (h); - int rc = -1; + int rc = 0; char *e = NULL; - if ((e = getenv ("DYAD_MOD_DEBUG")) && atoi (e)) - ctx->debug = true; - rc = 0; + ctx->debug = debug; + rc = dyad_mod_dtl_init ( + dtl_mode, + h, + ctx->debug, + &(ctx->dtl_handle) + ); return rc; } @@ -202,32 +227,68 @@ static const struct flux_msg_handler_spec htab[] = {{FLUX_MSGTYPE_REQUEST, dyad_fetch_request_cb, 0}, FLUX_MSGHANDLER_TABLE_END}; +void usage() +{ + fprintf(stderr, "Usage: flux exec -r all flux module load dyad.so \n\n"); + fprintf(stderr, "DTL_MODE Values:\n"); + fprintf(stderr, "================\n"); + fprintf(stderr, " * FLUX_RPC: use Flux's RPC response mechanism to send data to consumer\n"); + fprintf(stderr, " * UCX: use UCX to send data to consumer\n"); +} + int mod_main (flux_t *h, int argc, char **argv) { const mode_t m = (S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH | S_ISGID); dyad_mod_ctx_t *ctx = NULL; + size_t flag_len = 0; + dyad_mod_dtl_mode_t dtl_mode = DYAD_DTL_FLUX_RPC; + bool debug = false; if (!h) { fprintf (stderr, "Failed to get flux handle\n"); - goto done; + goto mod_done; } ctx = getctx (h); - if (argc != 1) { + FLUX_LOG_INFO (h, "Received %d cmd line args\n", argc); + + if (argc < 1) { FLUX_LOG_ERR (ctx->h, - "DYAD_MOD: Missing argument. " - "Requires a local dyad path specified.\n"); - fprintf (stderr, - "Missing argument. Requires a local dyad path specified.\n"); - goto error; + "DYAD_MOD: Missing argument(s). " + "Requires a local dyad path.\n"); + usage(); + goto mod_error; } (ctx->dyad_path) = argv[0]; mkdir_as_needed (ctx->dyad_path, m); - if (dyad_open (h) < 0) { + if (argc >= 2) { + FLUX_LOG_INFO (h, "DTL Mode (from cmd line) is %s\n", argv[1]); + flag_len = strlen(argv[1]); + if (strncmp (argv[1], "FLUX_RPC", flag_len) == 0) { + dtl_mode = DYAD_DTL_FLUX_RPC; + } else if (strncmp (argv[1], "UCX", flag_len) == 0) { + dtl_mode = DYAD_DTL_UCX; + } else { + FLUX_LOG_ERR (ctx->h, "Invalid DTL mode provided\n"); + usage(); + goto mod_error; + } + } + + if (argc >= 3) { + flag_len = strlen (argv[2]); + if (strncmp (argv[2], "--debug", flag_len) == 0 || strncmp (argv[2], "-d", flag_len) == 0) { + debug = true; + } else { + debug = false; + } + } + + if (dyad_open (h, dtl_mode, debug) < 0) { FLUX_LOG_ERR (ctx->h, "dyad_open failed"); - goto error; + goto mod_error; } fprintf (stderr, "dyad module begins using \"%s\"\n", argv[0]); @@ -236,20 +297,20 @@ int mod_main (flux_t *h, int argc, char **argv) if (flux_msg_handler_addvec (ctx->h, htab, (void *)h, &ctx->handlers) < 0) { FLUX_LOG_ERR (ctx->h, "flux_msg_handler_addvec: %s\n", strerror (errno)); - goto error; + goto mod_error; } if (flux_reactor_run (flux_get_reactor (ctx->h), 0) < 0) { FLUX_LOG_ERR (ctx->h, "flux_reactor_run: %s", strerror (errno)); - goto error; + goto mod_error; } - goto done; + goto mod_done; -error:; +mod_error:; return EXIT_FAILURE; -done:; +mod_done:; return EXIT_SUCCESS; } diff --git a/src/modules/dyad_ctx.h b/src/modules/dyad_ctx.h deleted file mode 100644 index 23a80eae..00000000 --- a/src/modules/dyad_ctx.h +++ /dev/null @@ -1,26 +0,0 @@ -/************************************************************\ - * Copyright 2021 Lawrence Livermore National Security, LLC - * (c.f. AUTHORS, NOTICE.LLNS, COPYING) - * - * This file is part of the Flux resource manager framework. - * For details, see https://github.com/flux-framework. - * - * SPDX-License-Identifier: LGPL-3.0 -\************************************************************/ - -#ifndef DYAD_MODULES_DYAD_CTX_H -#define DYAD_MODULES_DYAD_CTX_H - -#include -#include - -struct dyad_mod_ctx { - flux_t *h; - bool debug; - flux_msg_handler_t **handlers; - const char *dyad_path; -} dyad_mod_ctx_default = {NULL, false, NULL, NULL}; - -typedef struct dyad_mod_ctx dyad_mod_ctx_t; - -#endif // DYAD_MODULES_DYAD_CTX_H diff --git a/src/modules/dyad_flux_log.h b/src/modules/dyad_flux_log.h new file mode 100644 index 00000000..a8a24268 --- /dev/null +++ b/src/modules/dyad_flux_log.h @@ -0,0 +1,22 @@ +#ifndef __DYAD_MOD_DYAD_FLUX_LOG_H__ +#define __DYAD_MOD_DYAD_FLUX_LOG_H__ + +#include + +#if !defined(DYAD_LOGGING_ON) || (DYAD_LOGGING_ON == 0) +#define DYAD_LOG_INFO(dyad_ctx, ...) do {} while (0) +#define DYAD_LOG_ERR(dyad_ctx, ...) do {} while (0) +#define FLUX_LOG_INFO(flux_ctx, ...) do {} while (0) +#define FLUX_LOG_ERR(flux_ctx, ...) do {} while (0) +#else +#define DYAD_LOG_INFO(dyad_ctx, ...) flux_log (\ + dyad_ctx->h, LOG_INFO, __VA_ARGS__) +#define DYAD_LOG_ERR(dyad_ctx, ...) flux_log_error (\ + dyad_ctx->h, __VA_ARGS__) +#define FLUX_LOG_INFO(flux_ctx, ...) flux_log (\ + flux_ctx, LOG_INFO, __VA_ARGS__) +#define FLUX_LOG_ERR(flux_ctx, ...) flux_log_error (\ + flux_ctx, __VA_ARGS__) +#endif + +#endif /* __DYAD_MOD_DYAD_FLUX_LOG_H__ */ diff --git a/src/stream/Makefile.am b/src/stream/Makefile.am index 5d958519..22ac9ffe 100644 --- a/src/stream/Makefile.am +++ b/src/stream/Makefile.am @@ -1,9 +1,9 @@ lib_LTLIBRARIES = libdyad_fstream.la libdyad_fstream_la_SOURCES = dyad_stream_core.cpp -libdyad_fstream_la_LDFLAGS = $(AM_LDFLAGS) -avoid-version -no-undefined +libdyad_fstream_la_LDFLAGS = -Wl,-rpath,'$(UCX_LIBDIR)' $(AM_LDFLAGS) -avoid-version -no-undefined libdyad_fstream_la_LIBADD = $(top_builddir)/src/core/libdyad_core.la -libdyad_fstream_la_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/utils -I$(top_builddir)/src/core $(FLUX_CORE_CFLAGS) +libdyad_fstream_la_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/src/utils -I$(top_srcdir)/src/core $(FLUX_CORE_CFLAGS) if PERFFLOW libdyad_fstream_la_LIBADD += $(PERFFLOW_LIBS) libdyad_fstream_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 diff --git a/src/stream/dyad_params.hpp b/src/stream/dyad_params.hpp index 062aa3cb..8e1cba00 100644 --- a/src/stream/dyad_params.hpp +++ b/src/stream/dyad_params.hpp @@ -24,6 +24,9 @@ struct dyad_params { unsigned int m_key_depth; /// The number of bins used in key hashing unsigned int m_key_bins; + /// The DTL to use to move data from producer to consumer + /// Valid values can be found in dyad_dtl_defs.h from core + int m_dtl_mode; /// The KVS namespace of the sharing context std::string m_kvs_namespace; @@ -38,6 +41,7 @@ struct dyad_params { m_shared_storage (false), m_key_depth (2u), m_key_bins (256u), + m_dtl_mode (0), m_kvs_namespace (""), m_cons_managed_path (""), m_prod_managed_path ("") diff --git a/src/stream/dyad_stream_core.cpp b/src/stream/dyad_stream_core.cpp index 3ea6eea1..9819bde2 100644 --- a/src/stream/dyad_stream_core.cpp +++ b/src/stream/dyad_stream_core.cpp @@ -64,67 +64,23 @@ void dyad_stream_core::init () { char *e = NULL; - bool debug = false; - bool check = false; - bool shared_storage = false; - unsigned int key_depth = 0; - unsigned int key_bins = 0; - const char *kvs_namespace = NULL; - const char *cons_managed_path = NULL; - const char *prod_managed_path = NULL; - if (m_initialized) { return; } - if ((e = getenv (DYAD_SYNC_DEBUG_ENV)) && (atoi (e) != 0)) { - debug = true; - enable_debug_dyad_utils (); - fprintf (stderr, "DYAD_WRAPPER: Initializeing DYAD wrapper\n"); - } else { - debug = false; - disable_debug_dyad_utils (); - } - - if ((e = getenv (DYAD_SHARED_STORAGE_ENV)) && (atoi (e) != 0)) - shared_storage = true; - else - shared_storage = false; - - if ((e = getenv (DYAD_KEY_DEPTH_ENV))) - key_depth = atoi (e); - else - key_depth = 2; - - if ((e = getenv (DYAD_KEY_BINS_ENV))) - key_bins = atoi (e); - else - key_bins = 256; - - if ((e = getenv (DYAD_KVS_NAMESPACE_ENV))) { - kvs_namespace = e; - } else { - kvs_namespace = NULL; - } - if ((e = getenv (DYAD_PATH_CONSUMER_ENV))) { - cons_managed_path = e; - m_is_cons = (strlen (cons_managed_path) != 0); + m_is_cons = (strlen (e) != 0); } else { - cons_managed_path = NULL; m_is_cons = false; } if ((e = getenv (DYAD_PATH_PRODUCER_ENV))) { - prod_managed_path = e; - m_is_prod = (strlen (prod_managed_path) != 0); + m_is_prod = (strlen (e) != 0); } else { - prod_managed_path = NULL; m_is_prod = false; } dyad_rc_t rc = - dyad_init (debug, check, shared_storage, key_depth, key_bins, - kvs_namespace, prod_managed_path, cons_managed_path, &m_ctx); + dyad_init_env (&m_ctx); // TODO figure out if we want to error if init fails m_initialized = true; @@ -138,7 +94,9 @@ void dyad_stream_core::init (const dyad_params &p) dyad_init (p.m_debug, false, p.m_shared_storage, p.m_key_depth, p.m_key_bins, p.m_kvs_namespace.c_str (), p.m_prod_managed_path.c_str (), - p.m_cons_managed_path.c_str (), &m_ctx); + p.m_cons_managed_path.c_str (), + static_cast(p.m_dtl_mode), + &m_ctx); // TODO figure out if we want to error if init fails m_initialized = true; log_info ("Stream core is initialized by parameters"); diff --git a/src/utils/Makefile.am b/src/utils/Makefile.am index 3fd842ad..2ea4011c 100644 --- a/src/utils/Makefile.am +++ b/src/utils/Makefile.am @@ -1,9 +1,11 @@ -SUBDIRS = libtap +SUBDIRS = libtap base64 noinst_LTLIBRARIES = libutils.la libmurmur3.la -libutils_la_SOURCES = utils.c utils.h dyad.h -libutils_la_CPPFLAGS = $(AM_CPPFLAGS) $(FLUX_CORE_CFLAGS) -#libutils_la_LIBADD = +libutils_la_SOURCES = utils.c utils.h read_all.c read_all.h +libutils_la_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + $(FLUX_CORE_CFLAGS) +libutils_la_LIBADD = $(top_builddir)/src/utils/base64/libbase64.la $(FLUX_CORE_LIBS) libmurmur3_la_SOURCES = murmur3.c murmur3.h #libmurmur3_la_CPPFLAGS = $(AM_CPPFLAGS) #libmurmur3_la_LIBADD = diff --git a/src/utils/base64/Makefile.am b/src/utils/base64/Makefile.am new file mode 100644 index 00000000..4b11dfd8 --- /dev/null +++ b/src/utils/base64/Makefile.am @@ -0,0 +1,3 @@ +noinst_LTLIBRARIES = libbase64.la + +libbase64_la_SOURCES = base64.c base64.h diff --git a/src/utils/base64/base64.c b/src/utils/base64/base64.c new file mode 100644 index 00000000..bb60b331 --- /dev/null +++ b/src/utils/base64/base64.c @@ -0,0 +1,253 @@ +/* Licensed under BSD-MIT - see LICENSE file for details */ +#include "base64.h" + +#include +#include +#include +#include + +/** + * sixbit_to_b64 - maps a 6-bit value to the base64 alphabet + * @param map A base 64 map (see base64_init_map) + * @param sixbit Six-bit value to map + * @return a base 64 character + */ +static char sixbit_to_b64(const base64_maps_t *maps, const uint8_t sixbit) +{ + assert(sixbit <= 63); + + return maps->encode_map[(unsigned char)sixbit]; +} + +/** + * sixbit_from_b64 - maps a base64-alphabet character to its 6-bit value + * @param maps A base 64 maps structure (see base64_init_maps) + * @param sixbit Six-bit value to map + * @return a six-bit value + */ +static int8_t sixbit_from_b64(const base64_maps_t *maps, + const unsigned char b64letter) +{ + int8_t ret; + + ret = maps->decode_map[(unsigned char)b64letter]; + if (ret == (int8_t)0xff) { + errno = EDOM; + return -1; + } + + return ret; +} + +bool base64_char_in_alphabet(const base64_maps_t *maps, const char b64char) +{ + return (maps->decode_map[(const unsigned char)b64char] != (int8_t)0xff); +} + +void base64_init_maps(base64_maps_t *dest, const char src[64]) +{ + unsigned char i; + + memcpy(dest->encode_map,src,64); + memset(dest->decode_map,0xff,256); + for (i=0; i<64; i++) { + dest->decode_map[(unsigned char)src[i]] = i; + } +} + +size_t base64_encoded_length(size_t srclen) +{ + return ((srclen + 2) / 3) * 4; +} + +void base64_encode_triplet_using_maps(const base64_maps_t *maps, + char dest[4], const char src[3]) +{ + char a = src[0]; + char b = src[1]; + char c = src[2]; + + dest[0] = sixbit_to_b64(maps, (a & 0xfc) >> 2); + dest[1] = sixbit_to_b64(maps, ((a & 0x3) << 4) | ((b & 0xf0) >> 4)); + dest[2] = sixbit_to_b64(maps, ((c & 0xc0) >> 6) | ((b & 0xf) << 2)); + dest[3] = sixbit_to_b64(maps, c & 0x3f); +} + +void base64_encode_tail_using_maps(const base64_maps_t *maps, char dest[4], + const char *src, const size_t srclen) +{ + char longsrc[3] = { 0 }; + + assert(srclen <= 3); + + memcpy(longsrc, src, srclen); + base64_encode_triplet_using_maps(maps, dest, longsrc); + memset(dest+1+srclen, '=', 3-srclen); +} + +ssize_t base64_encode_using_maps(const base64_maps_t *maps, + char *dest, const size_t destlen, + const char *src, const size_t srclen) +{ + size_t src_offset = 0; + size_t dest_offset = 0; + + if (destlen < base64_encoded_length(srclen)) { + errno = EOVERFLOW; + return -1; + } + + while (srclen - src_offset >= 3) { + base64_encode_triplet_using_maps(maps, &dest[dest_offset], &src[src_offset]); + src_offset += 3; + dest_offset += 4; + } + + if (src_offset < srclen) { + base64_encode_tail_using_maps(maps, &dest[dest_offset], &src[src_offset], srclen-src_offset); + dest_offset += 4; + } + + memset(&dest[dest_offset], '\0', destlen-dest_offset); + + return dest_offset; +} + +size_t base64_decoded_length(size_t srclen) +{ + return ((srclen+3)/4*3); +} + +ssize_t base64_decode_quartet_using_maps(const base64_maps_t *maps, char dest[3], + const char src[4]) +{ + signed char a; + signed char b; + signed char c; + signed char d; + + a = sixbit_from_b64(maps, src[0]); + b = sixbit_from_b64(maps, src[1]); + c = sixbit_from_b64(maps, src[2]); + d = sixbit_from_b64(maps, src[3]); + + if ((a == -1) || (b == -1) || (c == -1) || (d == -1)) { + return -1; + } + + dest[0] = (a << 2) | (b >> 4); + dest[1] = ((b & 0xf) << 4) | (c >> 2); + dest[2] = ((c & 0x3) << 6) | d; + + return 0; +} + + +ssize_t base64_decode_tail_using_maps(const base64_maps_t *maps, char dest[3], + const char * src, const size_t srclen) +{ + char longsrc[4]; + int quartet_result; + size_t insize = srclen; + + while (insize != 0 && + src[insize-1] == '=') { /* throw away padding symbols */ + insize--; + } + if (insize == 0) { + return 0; + } + if (insize == 1) { + /* the input is malformed.... */ + errno = EINVAL; + return -1; + } + memcpy(longsrc, src, insize); + memset(longsrc+insize, 'A', 4-insize); + quartet_result = base64_decode_quartet_using_maps(maps, dest, longsrc); + if (quartet_result == -1) { + return -1; + } + + return insize - 1; +} + +ssize_t base64_decode_using_maps(const base64_maps_t *maps, + char *dest, const size_t destlen, + const char *src, const size_t srclen) +{ + ssize_t dest_offset = 0; + ssize_t i; + ssize_t more; + + if (destlen < base64_decoded_length(srclen)) { + errno = EOVERFLOW; + return -1; + } + + for(i=0; srclen - i > 4; i+=4) { + if (base64_decode_quartet_using_maps(maps, &dest[dest_offset], &src[i]) == -1) { + return -1; + } + dest_offset += 3; + } + + more = base64_decode_tail_using_maps(maps, &dest[dest_offset], &src[i], srclen - i); + if (more == -1) { + return -1; + } + dest_offset += more; + + memset(&dest[dest_offset], '\0', destlen-dest_offset); + + return dest_offset; +} + + + + +/** + * base64_maps_rfc4648 - pregenerated maps struct for rfc4648 + */ +const base64_maps_t base64_maps_rfc4648 = { + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", + + "\xff\xff\xff\xff\xff" /* 0 */ \ + "\xff\xff\xff\xff\xff" /* 5 */ \ + "\xff\xff\xff\xff\xff" /* 10 */ \ + "\xff\xff\xff\xff\xff" /* 15 */ \ + "\xff\xff\xff\xff\xff" /* 20 */ \ + "\xff\xff\xff\xff\xff" /* 25 */ \ + "\xff\xff\xff\xff\xff" /* 30 */ \ + "\xff\xff\xff\xff\xff" /* 35 */ \ + "\xff\xff\xff\x3e\xff" /* 40 */ \ + "\xff\xff\x3f\x34\x35" /* 45 */ \ + "\x36\x37\x38\x39\x3a" /* 50 */ \ + "\x3b\x3c\x3d\xff\xff" /* 55 */ \ + "\xff\xff\xff\xff\xff" /* 60 */ \ + "\x00\x01\x02\x03\x04" /* 65 A */ \ + "\x05\x06\x07\x08\x09" /* 70 */ \ + "\x0a\x0b\x0c\x0d\x0e" /* 75 */ \ + "\x0f\x10\x11\x12\x13" /* 80 */ \ + "\x14\x15\x16\x17\x18" /* 85 */ \ + "\x19\xff\xff\xff\xff" /* 90 */ \ + "\xff\xff\x1a\x1b\x1c" /* 95 */ \ + "\x1d\x1e\x1f\x20\x21" /* 100 */ \ + "\x22\x23\x24\x25\x26" /* 105 */ \ + "\x27\x28\x29\x2a\x2b" /* 110 */ \ + "\x2c\x2d\x2e\x2f\x30" /* 115 */ \ + "\x31\x32\x33\xff\xff" /* 120 */ \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" /* 125 */ \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" /* 155 */ \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" /* 185 */ \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" /* 215 */ \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" \ + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" /* 245 */ +}; diff --git a/src/utils/base64/base64.h b/src/utils/base64/base64.h new file mode 100644 index 00000000..a899af4a --- /dev/null +++ b/src/utils/base64/base64.h @@ -0,0 +1,241 @@ +/* Licensed under BSD-MIT - see LICENSE file for details */ +#ifndef CCAN_BASE64_H +#define CCAN_BASE64_H + +#include +#include +#include + +/** + * base64_maps_t - structure to hold maps for encode/decode + */ +typedef struct { + char encode_map[64]; + signed char decode_map[256]; +} base64_maps_t; + +/** + * base64_encoded_length - Calculate encode buffer length + * @param srclen the size of the data to be encoded + * @note add 1 to this to get null-termination + * @return Buffer length required for encode + */ +size_t base64_encoded_length(size_t srclen); + +/** + * base64_decoded_length - Calculate decode buffer length + * @param srclen Length of the data to be decoded + * @note This does not return the size of the decoded data! see base64_decode + * @return Minimum buffer length for safe decode + */ +size_t base64_decoded_length(size_t srclen); + +/** + * base64_init_maps - populate a base64_maps_t based on a supplied alphabet + * @param dest A base64 maps object + * @param src Alphabet to populate the maps from (e.g. base64_alphabet_rfc4648) + */ +void base64_init_maps(base64_maps_t *dest, const char src[64]); + + +/** + * base64_encode_triplet_using_maps - encode 3 bytes into base64 using a specific alphabet + * @param maps Maps to use for encoding (see base64_init_maps) + * @param dest Buffer containing 3 bytes + * @param src Buffer containing 4 characters + */ +void base64_encode_triplet_using_maps(const base64_maps_t *maps, + char dest[4], const char src[3]); + +/** + * base64_encode_tail_using_maps - encode the final bytes of a source using a specific alphabet + * @param maps Maps to use for encoding (see base64_init_maps) + * @param dest Buffer containing 4 bytes + * @param src Buffer containing srclen bytes + * @param srclen Number of bytes (<= 3) to encode in src + */ +void base64_encode_tail_using_maps(const base64_maps_t *maps, char dest[4], + const char *src, size_t srclen); + +/** + * base64_encode_using_maps - encode a buffer into base64 using a specific alphabet + * @param maps Maps to use for encoding (see base64_init_maps) + * @param dest Buffer to encode into + * @param destlen Length of dest + * @param src Buffer to encode + * @param srclen Length of the data to encode + * @return Number of encoded bytes set in dest. -1 on error (and errno set) + * @note dest will be nul-padded to destlen (past any required padding) + * @note sets errno = EOVERFLOW if destlen is too small + */ +ssize_t base64_encode_using_maps(const base64_maps_t *maps, + char *dest, size_t destlen, + const char *src, size_t srclen); + +/* + * base64_char_in_alphabet - returns true if character can be part of an encoded string + * @param maps A base64 maps object (see base64_init_maps) + * @param b64char Character to check + */ +bool base64_char_in_alphabet(const base64_maps_t *maps, char b64char); + +/** + * base64_decode_using_maps - decode a base64-encoded string using a specific alphabet + * @param maps A base64 maps object (see base64_init_maps) + * @param dest Buffer to decode into + * @param destlen length of dest + * @param src the buffer to decode + * @param srclen the length of the data to decode + * @return Number of decoded bytes set in dest. -1 on error (and errno set) + * @note dest will be nul-padded to destlen + * @note sets errno = EOVERFLOW if destlen is too small + * @note sets errno = EDOM if src contains invalid characters + */ +ssize_t base64_decode_using_maps(const base64_maps_t *maps, + char *dest, size_t destlen, + const char *src, size_t srclen); + +/** + * base64_decode_quartet_using_maps - decode 4 bytes from base64 using a specific alphabet + * @param maps A base64 maps object (see base64_init_maps) + * @param dest Buffer containing 3 bytes + * @param src Buffer containing 4 bytes + * @return Number of decoded bytes set in dest. -1 on error (and errno set) + * @note sets errno = EDOM if src contains invalid characters + */ +ssize_t base64_decode_quartet_using_maps(const base64_maps_t *maps, + char dest[3], const char src[4]); + +/** + * base64_decode_tail_using_maps - decode the final bytes of a base64 string using a specific alphabet + * @param maps A base64 maps object (see base64_init_maps) + * @param dest Buffer containing 3 bytes + * @param src Buffer containing 4 bytes - padded with '=' as required + * @param srclen Number of bytes to decode in src + * @return Number of decoded bytes set in dest. -1 on error (and errno set) + * @note sets errno = EDOM if src contains invalid characters + * @note sets errno = EINVAL if src is an invalid base64 tail + */ +ssize_t base64_decode_tail_using_maps(const base64_maps_t *maps, char dest[3], + const char *src, size_t srclen); + + +/* the rfc4648 functions: */ + +extern const base64_maps_t base64_maps_rfc4648; + +/** + * base64_encode - Encode a buffer into base64 according to rfc4648 + * @param dest Buffer to encode into + * @param destlen Length of the destination buffer + * @param src Buffer to encode + * @param srclen Length of the data to encode + * @return Number of encoded bytes set in dest. -1 on error (and errno set) + * @note dest will be nul-padded to destlen (past any required padding) + * @note sets errno = EOVERFLOW if destlen is too small + * + * This function encodes src according to http://tools.ietf.org/html/rfc4648 + * + * Example: + * size_t encoded_length; + * char dest[100]; + * const char *src = "This string gets encoded"; + * encoded_length = base64_encode(dest, sizeof(dest), src, strlen(src)); + * printf("Returned data of length %zd @%p\n", encoded_length, &dest); + */ +static inline +ssize_t base64_encode(char *dest, size_t destlen, + const char *src, size_t srclen) +{ + return base64_encode_using_maps(&base64_maps_rfc4648, + dest, destlen, src, srclen); +} + +/** + * base64_encode_triplet - encode 3 bytes into base64 according to rfc4648 + * @param dest Buffer containing 4 bytes + * @param src Buffer containing 3 bytes + */ +static inline +void base64_encode_triplet(char dest[4], const char src[3]) +{ + base64_encode_triplet_using_maps(&base64_maps_rfc4648, dest, src); +} + +/** + * base64_encode_tail - encode the final bytes of a source according to rfc4648 + * @param dest Buffer containing 4 bytes + * @param src Buffer containing srclen bytes + * @param srclen Number of bytes (<= 3) to encode in src + */ +static inline +void base64_encode_tail(char dest[4], const char *src, size_t srclen) +{ + base64_encode_tail_using_maps(&base64_maps_rfc4648, dest, src, srclen); +} + + +/** + * base64_decode - decode An rfc4648 base64-encoded string + * @param dest Buffer to decode into + * @param destlen Length of the destination buffer + * @param src Buffer to decode + * @param srclen Length of the data to decode + * @return Number of decoded bytes set in dest. -1 on error (and errno set) + * @note dest will be nul-padded to destlen + * @note sets errno = EOVERFLOW if destlen is too small + * @note sets errno = EDOM if src contains invalid characters + * + * This function decodes the buffer according to + * http://tools.ietf.org/html/rfc4648 + * + * Example: + * size_t decoded_length; + * char ret[100]; + * const char *src = "Zm9vYmFyYmF6"; + * decoded_length = base64_decode(ret, sizeof(ret), src, strlen(src)); + * printf("Returned data of length %zd @%p\n", decoded_length, &ret); + */ +static inline +ssize_t base64_decode(char *dest, size_t destlen, + const char *src, size_t srclen) +{ + return base64_decode_using_maps(&base64_maps_rfc4648, + dest, destlen, src, srclen); +} + +/** + * base64_decode_quartet - decode the first 4 characters in src into dest + * @param dest Buffer containing 3 bytes + * @param src Buffer containing 4 characters + * @return Number of decoded bytes set in dest. -1 on error (and errno set) + * @note sets errno = EDOM if src contains invalid characters + */ +static inline +ssize_t base64_decode_quartet(char dest[3], const char src[4]) +{ + return base64_decode_quartet_using_maps(&base64_maps_rfc4648, + dest, src); +} + +/** + * @brief decode the final bytes of a base64 string from src into dest + * @param dest Buffer containing 3 bytes + * @param src Buffer containing 4 bytes - padded with '=' as required + * @param srclen Number of bytes to decode in src + * @return Number of decoded bytes set in dest. -1 on error (and errno set) + * @note sets errno = EDOM if src contains invalid characters + * @note sets errno = EINVAL if src is an invalid base64 tail + */ +static inline +ssize_t base64_decode_tail(char dest[3], const char *src, size_t srclen) +{ + return base64_decode_tail_using_maps(&base64_maps_rfc4648, + dest, src, srclen); +} + +/* end rfc4648 functions */ + + + +#endif /* CCAN_BASE64_H */ diff --git a/src/utils/base64/license b/src/utils/base64/license new file mode 100644 index 00000000..89de3547 --- /dev/null +++ b/src/utils/base64/license @@ -0,0 +1,17 @@ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/src/modules/read_all.c b/src/utils/read_all.c similarity index 100% rename from src/modules/read_all.c rename to src/utils/read_all.c diff --git a/src/modules/read_all.h b/src/utils/read_all.h similarity index 82% rename from src/modules/read_all.h rename to src/utils/read_all.h index 5564fd01..bd1875ec 100644 --- a/src/modules/read_all.h +++ b/src/utils/read_all.h @@ -8,8 +8,8 @@ * SPDX-License-Identifier: LGPL-3.0 \************************************************************/ -#ifndef DYAD_MODULES_READ_ALL_H -#define DYAD_MODULES_READ_ALL_H +#ifndef UTILS_READ_ALL_H +#define UTILS_READ_ALL_H #include @@ -22,11 +22,10 @@ ssize_t write_all (int fd, const void *buf, size_t len); #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -ssize_t -read_all (int fd, void **bufp); +ssize_t read_all (int fd, void **bufp); #if defined(__cplusplus) }; #endif // defined(__cplusplus) -#endif /* !DYAD_MODULES_READ_ALL_H */ +#endif /* UTILS_READ_ALL_H */ diff --git a/src/utils/utils.c b/src/utils/utils.c index d8fd80aa..744e52f7 100644 --- a/src/utils/utils.c +++ b/src/utils/utils.c @@ -230,6 +230,7 @@ bool cmp_canonical_path_prefix (const char* __restrict__ prefix, char* __restrict__ upath, const size_t upath_capacity) { + printf ("Checking if there is overlap between prefix and upath\n"); { const char* const upath_end = upath + upath_capacity; bool no_overlap = @@ -243,10 +244,12 @@ bool cmp_canonical_path_prefix (const char* __restrict__ prefix, } // Only works when there are no multiple absolute paths via hardlinks + printf ("Create tmp strings on stack\n"); char can_prefix[PATH_MAX] = {'\0'}; // canonical form of the managed path char can_path[PATH_MAX] = {'\0'}; // canonical form of the given path // The path prefix needs to translate to a real path + printf ("Calling realpath for prefix\n"); if (!realpath (prefix, can_prefix)) { IPRINTF ("DYAD UTIL: error in realpath for %s\n", prefix); IPRINTF ("DYAD UTIL: %s\n", strerror (errno)); @@ -258,25 +261,32 @@ bool cmp_canonical_path_prefix (const char* __restrict__ prefix, // See if the prefix of the path in question matches that of either the // dyad managed path or its canonical form when the path is not a real one. + printf ("Calling realpath for path\n"); if (!realpath (path, can_path)) { IPRINTF ("DYAD UTIL: %s is NOT a realpath.\n", path); + printf ("Since path isn't a realpath, calling cmp_prefix\n"); if (!cmp_prefix (prefix, path, DYAD_PATH_DELIM, &upath_len)) { match = cmp_prefix (can_prefix, path, DYAD_PATH_DELIM, &upath_len); } else { match = true; } + printf ("Calling extract_user_path\n"); extract_user_path (path, upath, upath_len); return match; } // For a real path, see if the prefix of either the path or its canonical // form matches the managed path. + printf ("Calling cmp_prefix on can_prefix and can_path\n"); match = cmp_prefix (can_prefix, can_path, DYAD_PATH_DELIM, &upath_len); if (upath_len + 1 > upath_capacity) { + printf ("upath_len too big for upath_capacity\n"); return false; } + printf ("Calling extract_user_path\n"); extract_user_path (can_path, upath, upath_len); + printf ("Finished with cmp_canonical_path_prefix\n"); return match; } diff --git a/src/wrapper/Makefile.am b/src/wrapper/Makefile.am index 94e212de..ee02ecad 100644 --- a/src/wrapper/Makefile.am +++ b/src/wrapper/Makefile.am @@ -1,17 +1,15 @@ lib_LTLIBRARIES = dyad_wrapper.la dyad_wrapper_la_SOURCES = wrapper.c dyad_wrapper_la_LDFLAGS = \ + -Wl,-rpath,'$(UCX_LIBDIR)' \ $(AM_LDFLAGS) \ -module \ -avoid-version \ -no-undefined \ -shared \ -export-symbols wrapper.sym -dyad_wrapper_la_LIBADD = \ - $(top_builddir)/src/core/libdyad_core.la \ - # Need to add libutils.la now that libdyad_core is not exporting utils' symbols - $(top_builddir)/src/utils/libutils.la -dyad_wrapper_la_CPPFLAGS = -I$(top_builddir)/src/utils -I$(top_builddir)/src/core $(FLUX_CORE_CFLAGS) +dyad_wrapper_la_LIBADD = $(top_builddir)/src/core/libdyad_core.la +dyad_wrapper_la_CPPFLAGS = -I$(top_srcdir)/src/utils -I$(top_srcdir)/src/core $(FLUX_CORE_CFLAGS) install-exec-hook: @(cd $(DESTDIR)$(libdir) && $(RM) dyad_wrapper.la) diff --git a/src/wrapper/wrapper.c b/src/wrapper/wrapper.c index 8d7d2597..e41bd66d 100644 --- a/src/wrapper/wrapper.c +++ b/src/wrapper/wrapper.c @@ -86,68 +86,15 @@ static inline int is_wronly (int fd) void dyad_wrapper_init (void) { - char *e = NULL; - - bool debug = false; - bool check = false; - bool shared_storage = false; - unsigned int key_depth = 0; - unsigned int key_bins = 0; - char *kvs_namespace = NULL; - char *prod_managed_path = NULL; - char *cons_managed_path = NULL; dyad_rc_t rc = DYAD_RC_OK; - if ((e = getenv (DYAD_SYNC_DEBUG_ENV))) { - debug = true; - enable_debug_dyad_utils (); - fprintf (stderr, "DYAD_WRAPPER: Initializeing DYAD wrapper\n"); - } else { - debug = false; - disable_debug_dyad_utils (); - } - - if ((e = getenv (DYAD_SYNC_CHECK_ENV))) - check = true; - else - check = false; - - if ((e = getenv (DYAD_SHARED_STORAGE_ENV)) && (atoi (e) != 0)) - shared_storage = true; - else - shared_storage = false; - - if ((e = getenv (DYAD_KEY_DEPTH_ENV))) - key_depth = atoi (e); - else - key_depth = 2; - - if ((e = getenv (DYAD_KEY_BINS_ENV))) - key_bins = atoi (e); - else - key_bins = 256; - - if ((e = getenv (DYAD_KVS_NAMESPACE_ENV))) - kvs_namespace = e; - else - kvs_namespace = NULL; - - if ((e = getenv (DYAD_PATH_CONSUMER_ENV))) { - cons_managed_path = e; - } else { - cons_managed_path = NULL; - } - if ((e = getenv (DYAD_PATH_PRODUCER_ENV))) { - prod_managed_path = e; - } else { - prod_managed_path = NULL; - } - - rc = dyad_init (debug, check, shared_storage, key_depth, key_bins, - kvs_namespace, prod_managed_path, cons_managed_path, &ctx); + rc = dyad_init_env (&ctx); if (DYAD_IS_ERROR (rc)) { - DYAD_LOG_ERR (ctx, "Could not initialize DYAD!\n"); + fprintf(stderr, "Failed to initialize DYAD (code = %d)\n", rc); + if (ctx != NULL) { + dyad_wrapper_fini(); + } ctx = NULL; return; } From 8f3a2c4cb59415304e1147dbd92d7c2b5df30bce Mon Sep 17 00:00:00 2001 From: ilumsden Date: Mon, 1 May 2023 21:57:19 -0400 Subject: [PATCH 02/18] Modifies the wrapper library so that DYAD is initialized on library load again --- src/core/dtl/ucx_dtl.c | 25 +++----- src/core/dyad_core.c | 105 ++++------------------------------ src/modules/dtl/ucx_mod_dtl.c | 4 +- src/modules/dyad.c | 18 +++--- src/utils/utils.c | 10 ---- src/wrapper/wrapper.c | 32 +++++------ 6 files changed, 45 insertions(+), 149 deletions(-) diff --git a/src/core/dtl/ucx_dtl.c b/src/core/dtl/ucx_dtl.c index 37b98bf8..65580a4e 100644 --- a/src/core/dtl/ucx_dtl.c +++ b/src/core/dtl/ucx_dtl.c @@ -107,7 +107,6 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, ucs_status_t status; ucp_worker_attr_t worker_attrs; - printf ("Allocating UCX DTL handle\n"); *dtl_handle = malloc(sizeof(struct dyad_dtl_ucx)); if (*dtl_handle == NULL) { @@ -127,7 +126,6 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, // Read the UCX configuration FLUX_LOG_INFO ((*dtl_handle)->h, "Reading UCP config\n"); - printf ("Calling ucp_config_read\n"); status = ucp_config_read (NULL, NULL, &config); if (UCX_STATUS_FAIL(status)) { @@ -148,20 +146,18 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, UCP_PARAM_FIELD_REQUEST_SIZE | UCP_PARAM_FIELD_REQUEST_INIT; ucx_params.features = UCP_FEATURE_TAG | - UCP_FEATURE_RMA | + // UCP_FEATURE_RMA | UCP_FEATURE_WAKEUP; ucx_params.request_size = sizeof(struct ucx_request); ucx_params.request_init = dyad_ucx_request_init; // Initialize UCX FLUX_LOG_INFO ((*dtl_handle)->h, "Initializing UCP\n"); - printf ("Calling ucp_init\n"); status = ucp_init(&ucx_params, config, &(*dtl_handle)->ucx_ctx); // If in debug mode, print the configuration of UCX to stderr if (debug) { - printf ("Calling ucp_config_print\n"); ucp_config_print( config, stderr, @@ -170,7 +166,6 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, ); } // Release the config - printf ("Calling ucp_config_release\n"); ucp_config_release(config); // Log an error if UCX initialization failed if (UCX_STATUS_FAIL(status)) @@ -192,7 +187,6 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, // Create the worker and log an error if that fails FLUX_LOG_INFO ((*dtl_handle)->h, "Creating UCP worker\n"); - printf ("Calling ucp_worker_create\n"); status = ucp_worker_create( (*dtl_handle)->ucx_ctx, &worker_params, @@ -207,7 +201,6 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, // Query the worker for its address worker_attrs.field_mask = UCP_WORKER_ATTR_FIELD_ADDRESS; FLUX_LOG_INFO ((*dtl_handle)->h, "Get address of UCP worker\n"); - printf ("Calling ucp_worker_query\n"); status = ucp_worker_query( (*dtl_handle)->ucx_worker, &worker_attrs @@ -321,11 +314,9 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, dyad_ucx_request_t* req = NULL; // Use 'ucp_worker_wait' to poll the worker until // the tag recv event that we're looking for comes in. - FLUX_LOG_INFO (dtl_handle->h, "Starting UCP polling for incoming data\n"); + FLUX_LOG_INFO (dtl_handle->h, "Poll UCP for incoming data\n"); do { - FLUX_LOG_INFO (dtl_handle->h, "Progress UCP worker\n"); ucp_worker_progress (dtl_handle->ucx_worker); - FLUX_LOG_INFO (dtl_handle->h, "Probe the UCP worker for messages on tag %lu\n", dtl_handle->comm_tag); msg = ucp_tag_probe_nb( dtl_handle->ucx_worker, dtl_handle->comm_tag, @@ -336,6 +327,10 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, &msg_info ); } while (msg == NULL); + // TODO: This version of the polling code is not supposed to spin-lock, unlike the code above. + // Currently, it does not work. Once it starts working, we can replace the code above + // with a version of this code. + // // while (true) // { // // Probe the tag recv event at the top @@ -456,14 +451,12 @@ dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_ucx_t *dtl_handle) if (dtl_handle != NULL) { FLUX_LOG_INFO (dtl_handle->h, "Finalizing UCX DTL\n"); - FLUX_LOG_INFO (dtl_handle->h, "Releasing KVS Namespace\n"); // KVS namespace string should be released by the // DYAD context, so it is not released here dtl_handle->kvs_namespace = NULL; // Release consumer address if not already released if (dtl_handle->consumer_address != NULL) { - FLUX_LOG_INFO (dtl_handle->h, "Releasing worker address\n"); ucp_worker_release_address( dtl_handle->ucx_worker, dtl_handle->consumer_address @@ -472,20 +465,16 @@ dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_ucx_t *dtl_handle) } // Release worker if not already released if (dtl_handle->ucx_worker != NULL) - { - FLUX_LOG_INFO (dtl_handle->h, "Releasing worker\n"); + { ucp_worker_destroy(dtl_handle->ucx_worker); dtl_handle->ucx_worker = NULL; - FLUX_LOG_INFO (dtl_handle->h, "Worker released\n"); } // Release context if not already released if (dtl_handle->ucx_ctx != NULL) { - FLUX_LOG_INFO (dtl_handle->h, "Releasing context\n"); ucp_cleanup(dtl_handle->ucx_ctx); dtl_handle->ucx_ctx = NULL; } - FLUX_LOG_INFO (dtl_handle->h, "Releasing Flux handle\n"); // Flux handle should be released by the // DYAD context, so it is not released here dtl_handle->h = NULL; diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index d53ce570..b7fed5b3 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -203,14 +203,12 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, flux_future_t** f) #endif { - printf ("In dyad_kvs_lookup\n"); dyad_rc_t rc = DYAD_RC_OK; // Lookup information about the desired file (represented by kvs_topic) // from the Flux KVS. If there is no information, wait for it to be // made available DYAD_LOG_INFO (ctx, "Retrieving information from KVS under the key %s\n", kvs_topic); - printf ("Calling flux_kvs_lookup\n"); *f = flux_kvs_lookup (ctx->h, ctx->kvs_namespace, FLUX_KVS_WAITCREATE, kvs_topic); // If the KVS lookup failed, log an error and return DYAD_BADLOOKUP @@ -220,14 +218,12 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, } // Extract the rank of the producer from the KVS response DYAD_LOG_INFO (ctx, "Retrieving owner rank from KVS entry\n"); - printf ("Unpacking producer rank from KVS\n"); rc = flux_kvs_lookup_get_unpack (*f, "i", owner_rank); // If the extraction did not work, log an error and return DYAD_BADFETCH if (rc < 0) { DYAD_LOG_ERR (ctx, "Could not unpack owner's rank from KVS response\n"); return DYAD_RC_BADFETCH; } - printf ("Finished with dyad_kvs_lookup\n"); return DYAD_RC_OK; } @@ -242,7 +238,6 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, dyad_kvs_response_t** restrict resp) #endif { - printf ("In dyad_fetch\n"); dyad_rc_t rc = DYAD_RC_OK; char upath[PATH_MAX]; uint32_t owner_rank = 0; @@ -254,13 +249,10 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // Extract the path to the file specified by fname relative to the // consumer-managed path // This relative path will be stored in upath - printf ("Checking if %s is in consumer path\n", fname); if (!cmp_canonical_path_prefix (ctx->cons_managed_path, fname, upath, PATH_MAX)) { - printf ("%s not in consumer path, so exiting\n"); DYAD_LOG_INFO (ctx, "%s is not in the Consumer's managed path\n", fname); - printf ("Done with dyad_fetch\n"); return DYAD_RC_OK; } DYAD_LOG_INFO (ctx, @@ -268,12 +260,10 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, upath); // Generate the KVS key from the file path relative to // the consumer-managed directory - printf ("Generate KVS key\n"); gen_path_key (upath, topic, topic_len, ctx->key_depth, ctx->key_bins); DYAD_LOG_INFO (ctx, "Generated KVS key for consumer: %s\n", topic); // Call dyad_kvs_lookup to retrieve infromation about the file // from the Flux KVS - printf ("Calling dyad_kvs_lookup\n"); rc = dyad_kvs_lookup (ctx, topic, &owner_rank, &f); // If an error occured in dyad_kvs_lookup, log it and propagate the return // code @@ -288,7 +278,6 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // object, and return DYAD_OK. This will cause the file transfer step to be // skipped if (ctx->shared_storage || (owner_rank == ctx->rank)) { - printf ("Either shared_storage is enabled or we are on the producer node. So skip\n"); DYAD_LOG_INFO (ctx, "Either shared-storage is enabled or the producer rank " "is the " @@ -302,14 +291,12 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // return code DYAD_LOG_INFO (ctx, "Creating KVS response object to store retrieved data\n"); - printf ("Allocating KVS response struct\n"); *resp = malloc (sizeof (struct dyad_kvs_response)); if (*resp == NULL) { DYAD_LOG_ERR (ctx, "Cannot allocate a dyad_kvs_response_t object!\n"); rc = DYAD_RC_BADRESPONSE; goto fetch_done; } - printf ("Allocating space for upath (%s)\n", upath); (*resp)->fpath = malloc (strlen (upath) + 1); if ((*resp)->fpath == NULL) { @@ -318,41 +305,18 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, rc = DYAD_RC_BADRESPONSE; goto fetch_done; } - printf ("Filling KVS response\n"); strncpy ((*resp)->fpath, upath, strlen (upath) + 1); (*resp)->owner_rank = owner_rank; rc = DYAD_RC_OK; fetch_done:; // Destroy the Flux future if needed if (f != NULL) { - printf ("Destroying future\n"); flux_future_destroy (f); f = NULL; } - printf ("Finished with dyad_fetch\n"); return rc; } -// static inline dyad_rc_t process_remaining_rpc_msgs (const dyad_ctx_t* ctx, flux_future_t* f) -// { -// DYAD_LOG_INFO (ctx, "In process_remaining_rpc_msgs\n"); -// int rc = 0; -// while (true) { -// if ((rc = flux_rpc_get (f, NULL)) < 0) { -// DYAD_LOG_INFO(ctx, "flux_rpc_get returned < 0 (rc = %d)\n", rc); -// if (errno == ENODATA) { -// DYAD_LOG_INFO (ctx, "Reached end of RPC stream from module"); -// return DYAD_RC_OK; -// } else { -// DYAD_LOG_ERR (ctx, "An error occured in the DYAD module\n"); -// return DYAD_RC_BADRPC; -// } -// } -// DYAD_LOG_INFO(ctx, "flux_rpc_get returned >= 0 (rc = %d)\n", rc); -// flux_future_reset (f); -// } -// } - #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_get_data ( @@ -368,13 +332,11 @@ static inline dyad_rc_t dyad_get_data ( size_t* file_len) #endif { - printf ("In dyad_get_data\n"); dyad_rc_t rc = DYAD_RC_OK; dyad_rc_t final_rc = DYAD_RC_OK; flux_future_t *f; json_t* rpc_payload; DYAD_LOG_INFO (ctx, "Packing payload for RPC to DYAD module"); - printf ("Calling dyad_dtl_rpc_pack\n"); rc = dyad_dtl_rpc_pack ( ctx->dtl_handle, kvs_data->fpath, @@ -387,7 +349,6 @@ static inline dyad_rc_t dyad_get_data ( goto get_done; } DYAD_LOG_INFO (ctx, "Sending payload for RPC to DYAD module"); - printf ("Calling flux_rpc_pack\n"); f = flux_rpc_pack ( ctx->h, "dyad.fetch", @@ -403,7 +364,6 @@ static inline dyad_rc_t dyad_get_data ( goto get_done; } DYAD_LOG_INFO (ctx, "Receive RPC response from DYAD module"); - printf ("Calling dyad_dtl_recv_rpc_response\n"); rc = dyad_dtl_recv_rpc_response(ctx->dtl_handle, f); if (DYAD_IS_ERROR(rc)) { @@ -411,7 +371,6 @@ static inline dyad_rc_t dyad_get_data ( goto get_done; } DYAD_LOG_INFO (ctx, "Establish DTL connection with DYAD module"); - printf ("Calling dyad_dtl_establish_connection\n"); rc = dyad_dtl_establish_connection ( ctx->dtl_handle ); @@ -420,14 +379,12 @@ static inline dyad_rc_t dyad_get_data ( goto get_done; } DYAD_LOG_INFO (ctx, "Receive file data via DTL"); - printf ("Calling dyad_dtl_recv\n"); rc = dyad_dtl_recv ( ctx->dtl_handle, (void**) file_data, file_len ); DYAD_LOG_INFO (ctx, "Close DTL connection with DYAD module"); - printf ("Calling dyad_dtl_close_connection\n"); dyad_dtl_close_connection (ctx->dtl_handle); if (DYAD_IS_ERROR(rc)) { @@ -446,22 +403,15 @@ get_done:; // If we do not have either of these cases, we will wait for one more RPC message. // If everything went well in the module, this last message will set errno to ENODATA (i.e., end of stream). // Otherwise, something went wrong, so we'll return DYAD_RC_BADRPC. + DYAD_LOG_INFO (ctx, "Wait for end-of-stream message from module\n"); if (rc != DYAD_RC_RPC_FINISHED && rc != DYAD_RC_BADRPC) { - printf ("Waiting for end-of-stream message from module\n"); - DYAD_LOG_INFO (ctx, "Wait for end-of-stream message from module\n"); - if (flux_rpc_get (f, NULL) < 0 && errno == ENODATA) { - printf ("Wait completed successfully\n"); - DYAD_LOG_ERR (ctx, "Received end-of-stream message (ENODATA) from module\n"); - } else { + if (!(flux_rpc_get (f, NULL) < 0 && errno == ENODATA)) { DYAD_LOG_ERR (ctx, "An error occured at end of getting data! Either the module sent too many responses, or the module failed with a bad error (errno = %d)\n", errno); - printf ("Wait completed with error\n"); rc = DYAD_RC_BADRPC; } } DYAD_LOG_INFO (ctx, "Destroy the Flux future for the RPC\n"); - printf ("Destroying future\n"); flux_future_destroy (f); - printf ("Finished with dyad_get_data\n"); return rc; } @@ -474,7 +424,6 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, const dyad_kvs_response_t* restrict kvs_data) #endif { - printf ("In dyad_pull\n"); dyad_rc_t rc = DYAD_RC_OK; const char* file_data = NULL; size_t file_len = 0; @@ -489,14 +438,12 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, // Call dyad_get_data to dispatch a RPC to the producer's Flux broker // and retrieve the data associated with the file - printf ("Calling dyad_get_data\n"); rc = dyad_get_data (ctx, kvs_data, &file_data, &file_len); if (DYAD_IS_ERROR(rc)) { goto pull_done; } // Build the full path to the file being consumed - printf ("Build the full path to the file\n"); strncpy (file_path, ctx->cons_managed_path, PATH_MAX - 1); concat_str (file_path, kvs_data->fpath, "/", PATH_MAX); strncpy (file_path_copy, file_path, PATH_MAX); // dirname modifies the arg @@ -504,7 +451,6 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, DYAD_LOG_INFO (ctx, "Saving retrieved data to %s\n", file_path); // Create the directory as needed // TODO: Need to be consistent with the mode at the source - printf ("Creating directories as needed\n"); odir = dirname (file_path_copy); if ((strncmp (odir, ".", strlen (".")) != 0) && (mkdir_as_needed (odir, m) < 0)) { @@ -516,21 +462,18 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, } // Write the file contents to the location specified by the user - printf ("Openning file (%s)\n", file_path); of = fopen (file_path, "w"); if (of == NULL) { DYAD_LOG_ERR (ctx, "Cannot open file %s\n", file_path); rc = DYAD_RC_BADFIO; goto pull_done; } - printf ("Saving file (%s)\n", file_path); written_len = fwrite (file_data, sizeof (char), (size_t)file_len, of); if (written_len != (size_t)file_len) { DYAD_LOG_ERR (ctx, "fwrite of pulled file failed!\n"); rc = DYAD_RC_BADFIO; goto pull_done; } - printf ("Closing file (%s)\n", file_path); rc = fclose (of); if (rc != 0) { @@ -541,14 +484,12 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, pull_done: if (file_data != NULL) { - printf ("Freeing file data\n"); free(file_data); } // If "check" is set and the operation was successful, set the // DYAD_CHECK_ENV environment variable to "ok" if (rc == DYAD_RC_OK && (ctx && ctx->check)) setenv (DYAD_CHECK_ENV, "ok", 1); - printf ("Finished with dyad_pull\n"); return rc; } @@ -563,9 +504,7 @@ dyad_rc_t dyad_init (bool debug, dyad_dtl_mode_t dtl_mode, dyad_ctx_t** ctx) { - printf ("Initializing DYAD!\n"); dyad_rc_t rc = DYAD_RC_OK; - printf ("Checking if ctx is NULL\n"); // If ctx is NULL, we won't be able to return a dyad_ctx_t // to the user. In that case, print an error and return // immediately with DYAD_NOCTX. @@ -598,11 +537,9 @@ dyad_rc_t dyad_init (bool debug, } // Set the initial contents of the dyad_ctx_t object // to dyad_ctx_default. - printf ("Setting default values for ctx\n"); **ctx = dyad_ctx_default; // If neither managed path is provided, DYAD will not do anything. // So, simply print a warning and return DYAD_OK. - printf ("Checking prod path and cons path\n"); if (prod_managed_path == NULL && cons_managed_path == NULL) { fprintf (stderr, "Warning: no managed path provided! DYAD will not do " @@ -617,7 +554,6 @@ dyad_rc_t dyad_init (bool debug, (*ctx)->key_bins = key_bins; // Open a Flux handle and store it in the dyad_ctx_t // object. If the open operation failed, return DYAD_FLUXFAIL - printf ("Openning Flux\n"); (*ctx)->h = flux_open (NULL, 0); if ((*ctx)->h == NULL) { fprintf (stderr, "Could not open Flux handle!\n"); @@ -626,14 +562,12 @@ dyad_rc_t dyad_init (bool debug, // Get the rank of the Flux broker corresponding // to the handle. If this fails, return DYAD_FLUXFAIL FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: getting Flux rank"); - printf ("Getting Flux rank\n"); if (flux_get_rank ((*ctx)->h, &((*ctx)->rank)) < 0) { FLUX_LOG_ERR ((*ctx)->h, "Could not get Flux rank!\n"); return DYAD_RC_FLUXFAIL; } // If the namespace is provided, copy it into the dyad_ctx_t object FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: saving KVS namespace"); - printf ("Setting KVS namespace\n"); if (kvs_namespace == NULL) { FLUX_LOG_ERR ((*ctx)->h, "No KVS namespace provided!\n"); // TODO see if we want a different return val @@ -652,7 +586,6 @@ dyad_rc_t dyad_init (bool debug, // Initialize the DTL based on the value of dtl_mode // If an error occurs, log it and return an error FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: inintializing DYAD DTL"); - printf ("Initializing DYAD DTL\n"); rc = dyad_dtl_init( dtl_mode, (*ctx)->h, @@ -668,7 +601,6 @@ dyad_rc_t dyad_init (bool debug, // If the producer-managed path is provided, copy it into // the dyad_ctx_t object FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: saving producer path"); - printf ("Copying producer path\n"); if (prod_managed_path == NULL) { (*ctx)->prod_managed_path = NULL; } else { @@ -689,7 +621,6 @@ dyad_rc_t dyad_init (bool debug, // If the consumer-managed path is provided, copy it into // the dyad_ctx_t object FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: saving consumer path"); - printf ("Copying consumer path\n"); if (cons_managed_path == NULL) { (*ctx)->cons_managed_path = NULL; } else { @@ -713,7 +644,6 @@ dyad_rc_t dyad_init (bool debug, (*ctx)->reenter = true; (*ctx)->initialized = true; // TODO Print logging info - printf ("Done with dyad_init\n"); return DYAD_RC_OK; } @@ -731,8 +661,6 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) size_t dtl_mode_env_len = 0; dyad_dtl_mode_t dtl_mode = DYAD_DTL_FLUX_RPC; - printf("DYAD_CORE: Initializing with environment variables\n"); - if ((e = getenv (DYAD_SYNC_DEBUG_ENV))) { debug = true; enable_debug_dyad_utils (); @@ -741,6 +669,9 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) disable_debug_dyad_utils (); } + if (debug) + fprintf (stderr, "DYAD_CORE: Initializing with environment variables\n"); + if ((e = getenv (DYAD_SYNC_CHECK_ENV))) { check = true; } else { @@ -790,14 +721,17 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) } else if (strncmp (e, "UCX", dtl_mode_env_len) == 0) { dtl_mode = DYAD_DTL_UCX; } else { - printf ("Invalid DTL mode provided through %s. \ - Defaulting the FLUX_RPC\n", DYAD_DTL_MODE_ENV); + if (debug) { + fprintf (stderr, "Invalid DTL mode provided through %s. \ + Defaulting the FLUX_RPC\n", DYAD_DTL_MODE_ENV); + } dtl_mode = DYAD_DTL_FLUX_RPC; } } else { dtl_mode = DYAD_DTL_FLUX_RPC; } - printf ("DYAD_CORE: retrieved configuration from environment. Now initializing DYAD\n"); + if (debug) + fprintf (stderr, "DYAD_CORE: retrieved configuration from environment. Now initializing DYAD\n"); return dyad_init (debug, check, shared_storage, key_depth, key_bins, kvs_namespace, prod_managed_path, cons_managed_path, dtl_mode, ctx); @@ -823,28 +757,23 @@ dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) { - printf ("Calling dyad_consume for %s (ctx is NULL: %d)\n", fname, (ctx == NULL)); int rc = 0; // If the context is not defined, then it is not valid. // So, return DYAD_NOCTX - printf ("Checking if ctx is NULL\n"); if (!ctx || !ctx->h) { return DYAD_RC_NOCTX; } // If the consumer-managed path is NULL or empty, then the context is not // valid for a consumer operation. So, return DYAD_BADMANAGEDPATH - printf ("Confirming that consumer path is set\n"); if (ctx->cons_managed_path == NULL || strlen (ctx->cons_managed_path) == 0) { return DYAD_RC_BADMANAGEDPATH; } // Set reenter to false to avoid recursively performing // DYAD operations - printf ("Setting reenter to false to avoid recursion\n"); ctx->reenter = false; // Call dyad_fetch to get (and possibly wait on) // data from the Flux KVS - printf ("Calling dyad_fetch\n"); dyad_kvs_response_t* resp = NULL; rc = dyad_fetch (ctx, fname, &resp); // If an error occured in dyad_fetch, log an error @@ -858,18 +787,15 @@ dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) // This will most likely happend because shared_storage // is enabled if (resp == NULL) { - printf ("KVS response is NULL, which means we should skip dyad_pull\n"); DYAD_LOG_INFO (ctx, "The KVS response is NULL! Skipping dyad_pull!\n"); rc = DYAD_RC_OK; goto consume_done; } // Call dyad_pull to fetch the data from the producer's // Flux broker - printf ("Calling dyad_pull\n"); rc = dyad_pull (ctx, resp); // Regardless if there was an error in dyad_pull, // free the KVS response object - printf ("Freeing KVS response\n"); if (resp != NULL) { free (resp->fpath); free (resp); @@ -890,37 +816,28 @@ consume_done:; int dyad_finalize (dyad_ctx_t** ctx) { - printf ("Finalizing DYAD\n"); if (ctx == NULL || *ctx == NULL) { return DYAD_RC_OK; } - printf("Calling dyad_dtl_finalize\n"); dyad_dtl_finalize (&(*ctx)->dtl_handle); if ((*ctx)->h != NULL) { - printf("Calling flux_close\n"); flux_close ((*ctx)->h); (*ctx)->h = NULL; } if ((*ctx)->kvs_namespace != NULL) { - printf("Freeing kvs_namespace\n"); free ((*ctx)->kvs_namespace); (*ctx)->kvs_namespace = NULL; } if ((*ctx)->prod_managed_path != NULL) { - printf("Freeing producer path\n"); free ((*ctx)->prod_managed_path); (*ctx)->prod_managed_path = NULL; } if ((*ctx)->cons_managed_path != NULL) { - printf("Freeing consumer path\n"); free ((*ctx)->cons_managed_path); (*ctx)->cons_managed_path = NULL; } - printf ("Freeing DYAD context\n"); free (*ctx); - printf ("Setting *ctx to NULL to avoid extra freeing\n"); *ctx = NULL; - printf ("All done with dyad_finalize\n"); return DYAD_RC_OK; } diff --git a/src/modules/dtl/ucx_mod_dtl.c b/src/modules/dtl/ucx_mod_dtl.c index f9f87d78..d8de3edd 100644 --- a/src/modules/dtl/ucx_mod_dtl.c +++ b/src/modules/dtl/ucx_mod_dtl.c @@ -64,7 +64,7 @@ int dyad_mod_ucx_dtl_init(flux_t *h, bool debug, dyad_mod_ucx_dtl_t **dtl_handle UCP_PARAM_FIELD_REQUEST_SIZE | UCP_PARAM_FIELD_REQUEST_INIT; ucp_params.features = UCP_FEATURE_TAG | - UCP_FEATURE_RMA | + // UCP_FEATURE_RMA | UCP_FEATURE_WAKEUP; ucp_params.request_size = sizeof(struct mod_request); ucp_params.request_init = dyad_mod_ucx_request_init; @@ -231,7 +231,7 @@ int dyad_mod_ucx_dtl_send(dyad_mod_ucx_dtl_t *dtl_handle, void *buf, size_t bufl FLUX_LOG_INFO (dtl_handle->h, "Processing UCP send request\n"); if (UCS_PTR_IS_ERR(stat_ptr)) { - FLUX_LOG_INFO (dtl_handle->h, "Error occured in UCP send\n"); + FLUX_LOG_ERR (dtl_handle->h, "Error occured in UCP send\n"); status = UCS_PTR_STATUS(stat_ptr); } else if (UCS_PTR_IS_PTR(stat_ptr)) diff --git a/src/modules/dyad.c b/src/modules/dyad.c index c8d243c5..dbf1457d 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -229,11 +229,16 @@ static const struct flux_msg_handler_spec htab[] = {{FLUX_MSGTYPE_REQUEST, void usage() { - fprintf(stderr, "Usage: flux exec -r all flux module load dyad.so \n\n"); - fprintf(stderr, "DTL_MODE Values:\n"); - fprintf(stderr, "================\n"); - fprintf(stderr, " * FLUX_RPC: use Flux's RPC response mechanism to send data to consumer\n"); - fprintf(stderr, " * UCX: use UCX to send data to consumer\n"); + fprintf(stderr, "Usage: flux exec -r all flux module load dyad.so [DTL_MODE] [--debug | -d]\n\n"); + fprintf(stderr, "Required Arguments:\n"); + fprintf(stderr, "===================\n"); + fprintf(stderr, " * PRODUCER_PATH: the producer-managed path that the module should track\n\n"); + fprintf(stderr, "Optional Arguments:\n"); + fprintf(stderr, "===================\n"); + fprintf(stderr, " * DTL_MODE: a valid data transport layer (DTL) mode. Can be one of the following values\n"); + fprintf(stderr, " * UCX (default): use UCX to send data to consumer\n"); + fprintf(stderr, " * FLUX_RPC: use Flux's RPC response mechanism to send data to consumer\n"); + fprintf(stderr, " * --debug | -d: if provided, add debugging log messages\n"); } int mod_main (flux_t *h, int argc, char **argv) @@ -251,8 +256,6 @@ int mod_main (flux_t *h, int argc, char **argv) ctx = getctx (h); - FLUX_LOG_INFO (h, "Received %d cmd line args\n", argc); - if (argc < 1) { FLUX_LOG_ERR (ctx->h, "DYAD_MOD: Missing argument(s). " @@ -291,7 +294,6 @@ int mod_main (flux_t *h, int argc, char **argv) goto mod_error; } - fprintf (stderr, "dyad module begins using \"%s\"\n", argv[0]); FLUX_LOG_INFO (ctx->h, "dyad module begins using \"%s\"\n", argv[0]); if (flux_msg_handler_addvec (ctx->h, htab, (void *)h, &ctx->handlers) < 0) { diff --git a/src/utils/utils.c b/src/utils/utils.c index 744e52f7..d8fd80aa 100644 --- a/src/utils/utils.c +++ b/src/utils/utils.c @@ -230,7 +230,6 @@ bool cmp_canonical_path_prefix (const char* __restrict__ prefix, char* __restrict__ upath, const size_t upath_capacity) { - printf ("Checking if there is overlap between prefix and upath\n"); { const char* const upath_end = upath + upath_capacity; bool no_overlap = @@ -244,12 +243,10 @@ bool cmp_canonical_path_prefix (const char* __restrict__ prefix, } // Only works when there are no multiple absolute paths via hardlinks - printf ("Create tmp strings on stack\n"); char can_prefix[PATH_MAX] = {'\0'}; // canonical form of the managed path char can_path[PATH_MAX] = {'\0'}; // canonical form of the given path // The path prefix needs to translate to a real path - printf ("Calling realpath for prefix\n"); if (!realpath (prefix, can_prefix)) { IPRINTF ("DYAD UTIL: error in realpath for %s\n", prefix); IPRINTF ("DYAD UTIL: %s\n", strerror (errno)); @@ -261,32 +258,25 @@ bool cmp_canonical_path_prefix (const char* __restrict__ prefix, // See if the prefix of the path in question matches that of either the // dyad managed path or its canonical form when the path is not a real one. - printf ("Calling realpath for path\n"); if (!realpath (path, can_path)) { IPRINTF ("DYAD UTIL: %s is NOT a realpath.\n", path); - printf ("Since path isn't a realpath, calling cmp_prefix\n"); if (!cmp_prefix (prefix, path, DYAD_PATH_DELIM, &upath_len)) { match = cmp_prefix (can_prefix, path, DYAD_PATH_DELIM, &upath_len); } else { match = true; } - printf ("Calling extract_user_path\n"); extract_user_path (path, upath, upath_len); return match; } // For a real path, see if the prefix of either the path or its canonical // form matches the managed path. - printf ("Calling cmp_prefix on can_prefix and can_path\n"); match = cmp_prefix (can_prefix, can_path, DYAD_PATH_DELIM, &upath_len); if (upath_len + 1 > upath_capacity) { - printf ("upath_len too big for upath_capacity\n"); return false; } - printf ("Calling extract_user_path\n"); extract_user_path (can_path, upath, upath_len); - printf ("Finished with cmp_canonical_path_prefix\n"); return match; } diff --git a/src/wrapper/wrapper.c b/src/wrapper/wrapper.c index e41bd66d..79c48e37 100644 --- a/src/wrapper/wrapper.c +++ b/src/wrapper/wrapper.c @@ -47,7 +47,7 @@ extern "C" { #endif static __thread dyad_ctx_t *ctx = NULL; -// static void dyad_wrapper_init (void) __attribute__((constructor)); +static void dyad_wrapper_init (void) __attribute__((constructor)); static void dyad_wrapper_fini (void) __attribute__ ((destructor)); #if DYAD_SYNC_DIR @@ -92,10 +92,8 @@ void dyad_wrapper_init (void) if (DYAD_IS_ERROR (rc)) { fprintf(stderr, "Failed to initialize DYAD (code = %d)\n", rc); - if (ctx != NULL) { - dyad_wrapper_fini(); - } - ctx = NULL; + ctx->initialized = false; + ctx->reenter = false; return; } @@ -123,9 +121,9 @@ int open (const char *path, int oflag, ...) open_ptr_t func_ptr = NULL; int mode = 0; - if (ctx == NULL) { - dyad_wrapper_init (); - } + // if (ctx == NULL) { + // dyad_wrapper_init (); + // } if (oflag & O_CREAT) { va_list arg; @@ -169,9 +167,9 @@ FILE *fopen (const char *path, const char *mode) typedef FILE *(*fopen_ptr_t) (const char *, const char *); fopen_ptr_t func_ptr = NULL; - if (ctx == NULL) { - dyad_wrapper_init (); - } + // if (ctx == NULL) { + // dyad_wrapper_init (); + // } func_ptr = (fopen_ptr_t)dlsym (RTLD_NEXT, "fopen"); if ((error = dlerror ())) { @@ -210,9 +208,9 @@ int close (int fd) char path[PATH_MAX + 1] = {'\0'}; int rc = 0; - if (ctx == NULL) { - dyad_wrapper_init (); - } + // if (ctx == NULL) { + // dyad_wrapper_init (); + // } func_ptr = (close_ptr_t)dlsym (RTLD_NEXT, "close"); if ((error = dlerror ())) { @@ -300,9 +298,9 @@ int fclose (FILE *fp) int rc = 0; int fd = 0; - if (ctx == NULL) { - dyad_wrapper_init (); - } + // if (ctx == NULL) { + // dyad_wrapper_init (); + // } func_ptr = (fclose_ptr_t)dlsym (RTLD_NEXT, "fclose"); if ((error = dlerror ())) { From 252f96cb3ea0f7fdfcd891491474e07a49e4c865 Mon Sep 17 00:00:00 2001 From: ilumsden Date: Thu, 18 May 2023 16:13:04 -0400 Subject: [PATCH 03/18] Changes default DTL mode to UCX --- src/core/dtl/ucx_dtl.c | 1 + src/core/dyad_core.c | 2 +- src/modules/dtl/ucx_mod_dtl.c | 3 ++- src/modules/dyad.c | 3 ++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/core/dtl/ucx_dtl.c b/src/core/dtl/ucx_dtl.c index 65580a4e..2dbbdff7 100644 --- a/src/core/dtl/ucx_dtl.c +++ b/src/core/dtl/ucx_dtl.c @@ -375,6 +375,7 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, // The metadata retrived from the probed tag recv event contains // the size of the data to be sent. // So, use that size to allocate a buffer + FLUX_LOG_INFO (dtl_handle->h, "Got message with tag %lu and size %lu\n", msg_info.sender_tag, msg_info.length); *buflen = msg_info.length; *buf = malloc(*buflen); // If allocation fails, log an error diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index b7fed5b3..31daf3d8 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -659,7 +659,7 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) char *prod_managed_path = NULL; char *cons_managed_path = NULL; size_t dtl_mode_env_len = 0; - dyad_dtl_mode_t dtl_mode = DYAD_DTL_FLUX_RPC; + dyad_dtl_mode_t dtl_mode = DYAD_DTL_UCX; if ((e = getenv (DYAD_SYNC_DEBUG_ENV))) { debug = true; diff --git a/src/modules/dtl/ucx_mod_dtl.c b/src/modules/dtl/ucx_mod_dtl.c index d8de3edd..ce8f28fa 100644 --- a/src/modules/dtl/ucx_mod_dtl.c +++ b/src/modules/dtl/ucx_mod_dtl.c @@ -182,6 +182,7 @@ int dyad_mod_ucx_dtl_establish_connection(dyad_mod_ucx_dtl_t *dtl_handle) ); if (UCX_CHECK(status)) { + FLUX_LOG_ERR (dtl_handle->h, "ucp_ep_create failed with status %d\n", (int) status); return -1; } if (dtl_handle->debug) @@ -218,7 +219,7 @@ int dyad_mod_ucx_dtl_send(dyad_mod_ucx_dtl_t *dtl_handle, void *buf, size_t bufl ¶ms ); #else - FLUX_LOG_INFO (dtl_handle->h, "Sending data to consumer with ucp_tag_send_nb\n"); + FLUX_LOG_INFO (dtl_handle->h, "Sending %lu bytes of data to consumer with ucp_tag_send_nb\n", buflen); stat_ptr = ucp_tag_send_nb( dtl_handle->curr_ep, buf, diff --git a/src/modules/dyad.c b/src/modules/dyad.c index dbf1457d..2c470190 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -170,6 +170,7 @@ static void dyad_fetch_request_cb (flux_t *h, goto fetch_error; } close (fd); + FLUX_LOG_INFO (h, "Is inbuf NULL? -> %i\n", (int) (inbuf == NULL)); FLUX_LOG_INFO (h, "Establish DTL connection with consumer"); if (dyad_mod_dtl_establish_connection (ctx->dtl_handle) < 0) { @@ -246,7 +247,7 @@ int mod_main (flux_t *h, int argc, char **argv) const mode_t m = (S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH | S_ISGID); dyad_mod_ctx_t *ctx = NULL; size_t flag_len = 0; - dyad_mod_dtl_mode_t dtl_mode = DYAD_DTL_FLUX_RPC; + dyad_mod_dtl_mode_t dtl_mode = DYAD_DTL_UCX; bool debug = false; if (!h) { From 0f4a10c27ff6455f0ce2ac0fd271fcb2964a83dc Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 7 Jul 2023 12:18:58 -0700 Subject: [PATCH 04/18] Fixes a bug that caused a mismatch in the default DTL used by clients and the module --- src/core/dyad_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index 31daf3d8..e64d10d6 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -723,12 +723,12 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) } else { if (debug) { fprintf (stderr, "Invalid DTL mode provided through %s. \ - Defaulting the FLUX_RPC\n", DYAD_DTL_MODE_ENV); + Defaulting to UCX\n", DYAD_DTL_MODE_ENV); } - dtl_mode = DYAD_DTL_FLUX_RPC; + dtl_mode = DYAD_DTL_UCX; } } else { - dtl_mode = DYAD_DTL_FLUX_RPC; + dtl_mode = DYAD_DTL_UCX; } if (debug) fprintf (stderr, "DYAD_CORE: retrieved configuration from environment. Now initializing DYAD\n"); From 190ac3753b81b21412971861c14cd175e085fe95 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 14 Jul 2023 13:50:31 -0700 Subject: [PATCH 05/18] Implements a unified DTL to replace the sub-DTLs --- configure.ac | 3 +- src/Makefile.am | 2 +- src/core/Makefile.am | 28 +-- src/core/dtl/dyad_dtl.c | 158 ------------ src/core/dtl/dyad_dtl.h | 36 --- src/core/dtl/flux_dtl.c | 87 ------- src/core/dtl/flux_dtl.h | 41 --- src/core/dtl/ucx_dtl.h | 47 ---- src/core/dyad_core.c | 34 +-- src/core/dyad_core.h | 46 ++-- src/core/dyad_core.sym | 6 - src/core/dyad_dtl_defs.h | 24 -- src/dtl/Makefile.am | 25 ++ src/dtl/dyad_dtl.h | 20 ++ src/dtl/dyad_dtl_impl.c | 82 ++++++ src/dtl/dyad_dtl_impl.h | 64 +++++ src/{core => dtl}/dyad_flux_log.h | 0 src/{core => dtl}/dyad_rc.h | 6 + src/dtl/flux_dtl.c | 144 +++++++++++ src/dtl/flux_dtl.h | 41 +++ src/{core => }/dtl/ucx_dtl.c | 400 +++++++++++++++++++++++------- src/dtl/ucx_dtl.h | 48 ++++ src/modules/Makefile.am | 30 +-- src/modules/dtl/dyad_mod_dtl.c | 169 ------------- src/modules/dtl/dyad_mod_dtl.h | 42 ---- src/modules/dtl/flux_mod_dtl.c | 82 ------ src/modules/dtl/flux_mod_dtl.h | 38 --- src/modules/dtl/ucx_mod_dtl.c | 348 -------------------------- src/modules/dtl/ucx_mod_dtl.h | 43 ---- src/modules/dyad.c | 57 +++-- src/stream/Makefile.am | 17 +- src/urpc/Makefile.am | 6 +- src/utils/Makefile.am | 14 +- src/utils/base64/Makefile.am | 3 + src/wrapper/Makefile.am | 14 +- src/wrapper/wrapper.c | 10 +- src/wrapper/wrapper.sym | 4 - 37 files changed, 893 insertions(+), 1326 deletions(-) delete mode 100644 src/core/dtl/dyad_dtl.c delete mode 100644 src/core/dtl/dyad_dtl.h delete mode 100644 src/core/dtl/flux_dtl.c delete mode 100644 src/core/dtl/flux_dtl.h delete mode 100644 src/core/dtl/ucx_dtl.h delete mode 100644 src/core/dyad_core.sym delete mode 100644 src/core/dyad_dtl_defs.h create mode 100644 src/dtl/Makefile.am create mode 100644 src/dtl/dyad_dtl.h create mode 100644 src/dtl/dyad_dtl_impl.c create mode 100644 src/dtl/dyad_dtl_impl.h rename src/{core => dtl}/dyad_flux_log.h (100%) rename src/{core => dtl}/dyad_rc.h (92%) create mode 100644 src/dtl/flux_dtl.c create mode 100644 src/dtl/flux_dtl.h rename src/{core => }/dtl/ucx_dtl.c (54%) create mode 100644 src/dtl/ucx_dtl.h delete mode 100644 src/modules/dtl/dyad_mod_dtl.c delete mode 100644 src/modules/dtl/dyad_mod_dtl.h delete mode 100644 src/modules/dtl/flux_mod_dtl.c delete mode 100644 src/modules/dtl/flux_mod_dtl.h delete mode 100644 src/modules/dtl/ucx_mod_dtl.c delete mode 100644 src/modules/dtl/ucx_mod_dtl.h delete mode 100644 src/wrapper/wrapper.sym diff --git a/configure.ac b/configure.ac index ff988bc7..5e0e5bf7 100644 --- a/configure.ac +++ b/configure.ac @@ -185,7 +185,7 @@ AC_CHECK_FUNCS( \ # Add any necessary compilation flags # ####################################### if test "x$enable_debug" = xyes; then - CPPFLAGS="$CPPFLAGS -DDYAD_FULL_DEBUG=1 -DDYAD_LOGGING_ON=1" + CFLAGS="$CFLAGS -DDYAD_FULL_DEBUG=1 -DDYAD_LOGGING_ON=1" fi ######################## @@ -196,6 +196,7 @@ AC_CONFIG_FILES([Makefile src/utils/Makefile src/utils/base64/Makefile src/utils/libtap/Makefile + src/dtl/Makefile src/core/Makefile src/stream/Makefile src/modules/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index 06bca034..626467d4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1 +1 @@ -SUBDIRS = utils modules core wrapper stream +SUBDIRS = utils dtl modules core wrapper stream diff --git a/src/core/Makefile.am b/src/core/Makefile.am index ceabfe91..e34a0b29 100644 --- a/src/core/Makefile.am +++ b/src/core/Makefile.am @@ -1,29 +1,29 @@ lib_LTLIBRARIES = libdyad_core.la libdyad_core_la_SOURCES = \ - dtl/dyad_dtl.c \ - dtl/dyad_dtl.h \ - dtl/flux_dtl.c \ - dtl/flux_dtl.h \ - dtl/ucx_dtl.c \ - dtl/ucx_dtl.h \ dyad_core.c libdyad_core_la_LIBADD = \ - $(top_builddir)/src/utils/libutils.la \ - $(top_builddir)/src/utils/libmurmur3.la \ + $(top_builddir)/src/dtl/libdyad_dtl.la \ $(UCX_LIBS) \ $(JANSSON_LIBS) \ $(FLUX_CORE_LIBS) -libdyad_core_la_CPPFLAGS = \ - $(AM_CPPFLAGS) \ +libdyad_core_la_CFLAGS = \ + $(AM_CFLAGS) \ -I$(top_srcdir)/src/utils \ -I$(top_srcdir)/src/utils/base64 \ + -I$(top_srcdir)/src/dtl \ $(UCX_CFLAGS) \ $(JANSSON_CFLAGS) \ - $(FLUX_CORE_CFLAGS) -libdyad_core_la_LDFLAGS = -export-symbols dyad_core.sym -Wl,-rpath,'$(UCX_LIBDIR)' $(AM_LDFLAGS) + $(FLUX_CORE_CFLAGS) \ + -DBUILDING_DYAD=1 \ + -fvisibility=hidden +libdyad_core_la_CPPFLAGS = +libdyad_core_la_LDFLAGS = \ + -Wl,-rpath,'$(UCX_LIBDIR)' \ + $(AM_LDFLAGS) if PERFFLOW libdyad_core_la_LIBADD += $(PERFFLOW_LIBS) -libdyad_core_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +libdyad_core_la_CFLAGS += $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +libdyad_core_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) endif -include_HEADERS = dyad_core.h dyad_envs.h dyad_rc.h dyad_flux_log.h dyad_dtl_defs.h +include_HEADERS = dyad_core.h dyad_envs.h diff --git a/src/core/dtl/dyad_dtl.c b/src/core/dtl/dyad_dtl.c deleted file mode 100644 index e9d110bc..00000000 --- a/src/core/dtl/dyad_dtl.c +++ /dev/null @@ -1,158 +0,0 @@ -#include "dyad_dtl.h" - -#include "dyad_dtl_defs.h" -#include "ucx_dtl.h" -#include "flux_dtl.h" - -// Actual definition of dyad_dtl_t -struct dyad_dtl { - flux_t* h; - dyad_dtl_mode_t mode; - void *real_dtl_handle; -}; - -dyad_rc_t dyad_dtl_init(dyad_dtl_mode_t mode, flux_t *h, - const char *kvs_namespace, bool debug, - dyad_dtl_t **dtl_handle) -{ - *dtl_handle = malloc(sizeof(struct dyad_dtl)); - if (*dtl_handle == NULL) - { - FLUX_LOG_ERR (h, "Could not allocate a dyad_dtl_t object\n"); - return DYAD_RC_SYSFAIL; - } - (*dtl_handle)->mode = mode; - (*dtl_handle)->h = h; - if (mode == DYAD_DTL_UCX) { - FLUX_LOG_INFO (h, "Initializing UCX DTL\n"); - return dyad_dtl_ucx_init (h, kvs_namespace, debug, - (dyad_dtl_ucx_t**)&((*dtl_handle)->real_dtl_handle)); - } - if (mode == DYAD_DTL_FLUX_RPC) - { - FLUX_LOG_INFO (h, "Initializing Flux RPC DTL\n"); - return dyad_dtl_flux_init (h, kvs_namespace, debug, - (dyad_dtl_flux_t**)&((*dtl_handle)->real_dtl_handle)); - } - FLUX_LOG_ERR (h, "Invalid DYAD DTL Mode: %d\n", (int) mode); - return DYAD_RC_BADDTLMODE; -} - -dyad_rc_t dyad_dtl_rpc_pack(dyad_dtl_t *dtl_handle, const char *upath, uint32_t producer_rank, - json_t **packed_obj) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) { - return dyad_dtl_ucx_rpc_pack( - (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle, - upath, - producer_rank, - packed_obj - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { - return dyad_dtl_flux_rpc_pack( - (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle, - upath, - producer_rank, - packed_obj - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); - return DYAD_RC_BADDTLMODE; -} - -dyad_rc_t dyad_dtl_recv_rpc_response(dyad_dtl_t* dtl_handle, flux_future_t *f) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) - { - return dyad_dtl_ucx_recv_rpc_response( - (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle, - f - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) - { - return dyad_dtl_flux_recv_rpc_response( - (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle, - f - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); - return DYAD_RC_BADDTLMODE; -} - -dyad_rc_t dyad_dtl_establish_connection(dyad_dtl_t *dtl_handle) { - if (dtl_handle->mode == DYAD_DTL_UCX) { - return dyad_dtl_ucx_establish_connection( - (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { - return dyad_dtl_flux_establish_connection( - (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); - return DYAD_RC_BADDTLMODE; -} - -dyad_rc_t dyad_dtl_recv(dyad_dtl_t *dtl_handle, - void **buf, size_t *buflen) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) { - return dyad_dtl_ucx_recv( - (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle, - buf, - buflen - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { - return dyad_dtl_flux_recv( - (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle, - buf, - buflen - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); - return DYAD_RC_BADDTLMODE; -} - -dyad_rc_t dyad_dtl_close_connection(dyad_dtl_t *dtl_handle) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) { - return dyad_dtl_ucx_close_connection( - (dyad_dtl_ucx_t*) dtl_handle->real_dtl_handle - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) { - return dyad_dtl_flux_close_connection( - (dyad_dtl_flux_t*) dtl_handle->real_dtl_handle - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL Mode: %d\n", (int) dtl_handle->mode); - return DYAD_RC_BADDTLMODE; -} - -dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle) -{ - if (dtl_handle == NULL || *dtl_handle == NULL) { - return DYAD_RC_OK; - } - flux_t* h = (*dtl_handle)->h; - dyad_dtl_mode_t mode = (*dtl_handle)->mode; - void *real_dtl_handle = (*dtl_handle)->real_dtl_handle; - free(*dtl_handle); - *dtl_handle = NULL; - if (mode == DYAD_DTL_UCX) { - return dyad_dtl_ucx_finalize( - (dyad_dtl_ucx_t*) real_dtl_handle - ); - } - if (mode == DYAD_DTL_FLUX_RPC) { - return dyad_dtl_flux_finalize( - (dyad_dtl_flux_t*) real_dtl_handle - ); - } - FLUX_LOG_ERR (h, "Invalid DYAD DTL Mode: %d\n", (int) mode); - return DYAD_RC_BADDTLMODE; -} diff --git a/src/core/dtl/dyad_dtl.h b/src/core/dtl/dyad_dtl.h deleted file mode 100644 index eed5cd73..00000000 --- a/src/core/dtl/dyad_dtl.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef __DYAD_DTL_H__ -#define __DYAD_DTL_H__ - -#include "dyad_dtl_defs.h" -#include "dyad_rc.h" - -#include -#include - -#ifdef __cplusplus -#include -#else -#include -#include -#endif - -dyad_rc_t dyad_dtl_init(dyad_dtl_mode_t mode, flux_t *h, - const char *kvs_namespace, bool debug, - dyad_dtl_t **dtl_handle); - -dyad_rc_t dyad_dtl_rpc_pack(dyad_dtl_t *dtl_handle, - const char *upath, uint32_t producer_rank, json_t **packed_obj); - -dyad_rc_t dyad_dtl_recv_rpc_response(dyad_dtl_t *dtl_handle, - flux_future_t *f); - -dyad_rc_t dyad_dtl_establish_connection(dyad_dtl_t *dtl_handle); - -dyad_rc_t dyad_dtl_recv(dyad_dtl_t *dtl_handle, - void **buf, size_t *buflen); - -dyad_rc_t dyad_dtl_close_connection(dyad_dtl_t *dtl_handle); - -dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle); - -#endif /* __DYAD_DTL_H__ */ diff --git a/src/core/dtl/flux_dtl.c b/src/core/dtl/flux_dtl.c deleted file mode 100644 index 6fc827d1..00000000 --- a/src/core/dtl/flux_dtl.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "flux_dtl.h" -#include "dyad_rc.h" - -dyad_rc_t dyad_dtl_flux_init(flux_t *h, const char *kvs_namespace, - bool debug, dyad_dtl_flux_t **dtl_handle) -{ - *dtl_handle = malloc(sizeof(struct dyad_dtl_flux)); - if (*dtl_handle == NULL) - { - FLUX_LOG_ERR (h, "Cannot allocate the DTL handle for Flux\n"); - return DYAD_RC_SYSFAIL; - } - (*dtl_handle)->h = h; - (*dtl_handle)->kvs_namespace = kvs_namespace; - (*dtl_handle)->f = NULL; - return DYAD_RC_OK; -} - -dyad_rc_t dyad_dtl_flux_rpc_pack(dyad_dtl_flux_t *dtl_handle, const char *upath, - uint32_t producer_rank, json_t **packed_obj) -{ - *packed_obj = json_pack( - "{s:s}", - "upath", - upath - ); - if (*packed_obj == NULL) - { - FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath for Flux DTL\n"); - return DYAD_RC_BADPACK; - } - return DYAD_RC_OK; -} - -dyad_rc_t dyad_dtl_flux_recv_rpc_response(dyad_dtl_flux_t *dtl_handle, - flux_future_t *f) -{ - dtl_handle->f = f; - return DYAD_RC_OK; -} - -dyad_rc_t dyad_dtl_flux_establish_connection(dyad_dtl_flux_t *dtl_handle) -{ - return DYAD_RC_OK; -} - -dyad_rc_t dyad_dtl_flux_recv(dyad_dtl_flux_t *dtl_handle, - void **buf, size_t *buflen) -{ - int rc = 0; - void* tmp_buf = NULL; - size_t tmp_buflen = 0; - errno = 0; - FLUX_LOG_INFO (dtl_handle->h, "Get file contents from module using RPC\n"); - rc = flux_rpc_get_raw(dtl_handle->f, (const void**) &tmp_buf, (int*) &tmp_buflen); - if (rc < 0) - { - FLUX_LOG_ERR (dtl_handle->h, "Could not get file data from Flux RPC\n"); - if (errno == ENODATA) - return DYAD_RC_RPC_FINISHED; - return DYAD_RC_BADRPC; - } - *buflen = tmp_buflen; - *buf = malloc(*buflen); - memcpy(*buf, tmp_buf, *buflen); - flux_future_reset (dtl_handle->f); - return DYAD_RC_OK; -} - -dyad_rc_t dyad_dtl_flux_close_connection(dyad_dtl_flux_t *dtl_handle) -{ - dtl_handle->f = NULL; - return DYAD_RC_OK; -} - -dyad_rc_t dyad_dtl_flux_finalize(dyad_dtl_flux_t *dtl_handle) -{ - if (dtl_handle != NULL) - { - dtl_handle->h = NULL; - dtl_handle->kvs_namespace = NULL; - dtl_handle->f = NULL; - free(dtl_handle); - dtl_handle = NULL; - } - return DYAD_RC_OK; -} diff --git a/src/core/dtl/flux_dtl.h b/src/core/dtl/flux_dtl.h deleted file mode 100644 index a7e66609..00000000 --- a/src/core/dtl/flux_dtl.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __FLUX_DTL_H__ -#define __FLUX_DTL_H__ - -#include "dyad_flux_log.h" -#include "dyad_rc.h" - -#include - -#ifdef __cplusplus -#include -#else -#include -#endif - -struct dyad_dtl_flux { - flux_t *h; - const char *kvs_namespace; - flux_future_t *f; -}; - -typedef struct dyad_dtl_flux dyad_dtl_flux_t; - -dyad_rc_t dyad_dtl_flux_init(flux_t *h, const char *kvs_namespace, - bool debug, dyad_dtl_flux_t **dtl_handle); - -dyad_rc_t dyad_dtl_flux_rpc_pack(dyad_dtl_flux_t *dtl_handle, - const char *upath, uint32_t producer_rank, json_t **packed_obj); - -dyad_rc_t dyad_dtl_flux_recv_rpc_response(dyad_dtl_flux_t *dtl_handle, - flux_future_t *f); - -dyad_rc_t dyad_dtl_flux_establish_connection(dyad_dtl_flux_t *dtl_handle); - -dyad_rc_t dyad_dtl_flux_recv(dyad_dtl_flux_t *dtl_handle, - void **buf, size_t *buflen); - -dyad_rc_t dyad_dtl_flux_close_connection(dyad_dtl_flux_t *dtl_handle); - -dyad_rc_t dyad_dtl_flux_finalize(dyad_dtl_flux_t *dtl_handle); - -#endif /* __FLUX_DTL_H__ */ diff --git a/src/core/dtl/ucx_dtl.h b/src/core/dtl/ucx_dtl.h deleted file mode 100644 index 27b504c9..00000000 --- a/src/core/dtl/ucx_dtl.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef __UCX_DTL_H__ -#define __UCX_DTL_H__ - -#include "dyad_flux_log.h" -#include "dyad_rc.h" - -#include -#include - -#ifdef __cplusplus -#include -#else -#include -#endif - -struct dyad_dtl_ucx { - flux_t *h; - const char *kvs_namespace; - ucp_context_h ucx_ctx; - ucp_worker_h ucx_worker; - ucp_address_t *consumer_address; - size_t addr_len; - ucp_tag_t comm_tag; -}; - -typedef struct dyad_dtl_ucx dyad_dtl_ucx_t; - -dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, - bool debug, dyad_dtl_ucx_t **dtl_handle); - -dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, - const char *upath, uint32_t producer_rank, json_t **packed_obj); - -dyad_rc_t dyad_dtl_ucx_recv_rpc_response(dyad_dtl_ucx_t *dtl_handle, - flux_future_t *f); - -dyad_rc_t dyad_dtl_ucx_establish_connection(dyad_dtl_ucx_t *dtl_handle); - -dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, - void **buf, size_t *buflen); - -dyad_rc_t dyad_dtl_ucx_close_connection( - dyad_dtl_ucx_t *dtl_handle); - -dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_ucx_t *dtl_handle); - -#endif /* __UCX_DTL_H__ */ diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index e64d10d6..f225a0c0 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -1,13 +1,10 @@ -#include -#include - #include "dyad_core.h" -#include "dyad_dtl_defs.h" -#include "dyad_flux_log.h" -#include "dyad_rc.h" -#include "dtl/dyad_dtl.h" #include "murmur3.h" #include "utils.h" +#include "dyad_dtl_impl.h" + +#include +#include #ifdef __cplusplus #include @@ -337,7 +334,7 @@ static inline dyad_rc_t dyad_get_data ( flux_future_t *f; json_t* rpc_payload; DYAD_LOG_INFO (ctx, "Packing payload for RPC to DYAD module"); - rc = dyad_dtl_rpc_pack ( + rc = ctx->dtl_handle->rpc_pack ( ctx->dtl_handle, kvs_data->fpath, kvs_data->owner_rank, @@ -364,28 +361,34 @@ static inline dyad_rc_t dyad_get_data ( goto get_done; } DYAD_LOG_INFO (ctx, "Receive RPC response from DYAD module"); - rc = dyad_dtl_recv_rpc_response(ctx->dtl_handle, f); + rc = ctx->dtl_handle->rpc_recv_response ( + ctx->dtl_handle, + f + ); if (DYAD_IS_ERROR(rc)) { DYAD_LOG_ERR(ctx, "Cannot receive and/or parse the RPC response\n"); goto get_done; } DYAD_LOG_INFO (ctx, "Establish DTL connection with DYAD module"); - rc = dyad_dtl_establish_connection ( - ctx->dtl_handle + rc = ctx->dtl_handle->establish_connection ( + ctx->dtl_handle, + DYAD_COMM_RECV ); if (DYAD_IS_ERROR(rc)) { DYAD_LOG_ERR (ctx, "Cannot establish connection with DYAD module on broker %u\n", kvs_data->owner_rank); goto get_done; } DYAD_LOG_INFO (ctx, "Receive file data via DTL"); - rc = dyad_dtl_recv ( + rc = ctx->dtl_handle->recv ( ctx->dtl_handle, (void**) file_data, file_len ); DYAD_LOG_INFO (ctx, "Close DTL connection with DYAD module"); - dyad_dtl_close_connection (ctx->dtl_handle); + ctx->dtl_handle->close_connection ( + ctx->dtl_handle + ); if (DYAD_IS_ERROR(rc)) { DYAD_LOG_ERR (ctx, "Cannot receive data from producer module\n"); @@ -587,11 +590,10 @@ dyad_rc_t dyad_init (bool debug, // If an error occurs, log it and return an error FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: inintializing DYAD DTL"); rc = dyad_dtl_init( + &(*ctx)->dtl_handle, dtl_mode, (*ctx)->h, - (*ctx)->kvs_namespace, - (*ctx)->debug, - &(*ctx)->dtl_handle + (*ctx)->debug ); if (DYAD_IS_ERROR(rc)) { diff --git a/src/core/dyad_core.h b/src/core/dyad_core.h index b705f470..bc75ce08 100644 --- a/src/core/dyad_core.h +++ b/src/core/dyad_core.h @@ -1,11 +1,11 @@ #ifndef DYAD_CORE_DYAD_CORE_H #define DYAD_CORE_DYAD_CORE_H -// Includes #include "dyad_envs.h" #include "dyad_rc.h" +// Includes #include "dyad_flux_log.h" -#include "dyad_dtl_defs.h" +#include "dyad_dtl.h" #ifdef __cplusplus #include @@ -33,22 +33,22 @@ extern "C" { * @struct dyad_ctx */ struct dyad_ctx { - flux_t* h; // the Flux handle for DYAD - dyad_dtl_t *dtl_handle; // Opaque handle to DTL info - bool debug; // if true, perform debug logging - bool check; // if true, perform some check logging - bool reenter; // if false, do not recursively enter DYAD - bool initialized; // if true, DYAD is initialized - bool shared_storage; // if true, the managed path is shared - bool sync_started; // TODO - unsigned int key_depth; // Depth of bins for the Flux KVS - unsigned int key_bins; // Number of bins for the Flux KVS - uint32_t rank; // Flux rank for DYAD - char* kvs_namespace; // Flux KVS namespace for DYAD - char* prod_managed_path; // producer path managed by DYAD - char* cons_managed_path; // consumer path managed by DYAD + flux_t* h; // the Flux handle for DYAD + struct dyad_dtl *dtl_handle; // Opaque handle to DTL info + bool debug; // if true, perform debug logging + bool check; // if true, perform some check logging + bool reenter; // if false, do not recursively enter DYAD + bool initialized; // if true, DYAD is initialized + bool shared_storage; // if true, the managed path is shared + bool sync_started; // TODO + unsigned int key_depth; // Depth of bins for the Flux KVS + unsigned int key_bins; // Number of bins for the Flux KVS + uint32_t rank; // Flux rank for DYAD + char* kvs_namespace; // Flux KVS namespace for DYAD + char* prod_managed_path; // producer path managed by DYAD + char* cons_managed_path; // consumer path managed by DYAD }; -extern const struct dyad_ctx dyad_ctx_default; +DYAD_DLL_EXPORTED extern const struct dyad_ctx dyad_ctx_default; typedef struct dyad_ctx dyad_ctx_t; struct dyad_kvs_response { @@ -93,7 +93,7 @@ typedef struct dyad_kvs_response dyad_kvs_response_t; * * @return An integer error code (values TBD) */ -dyad_rc_t dyad_init (bool debug, +DYAD_DLL_EXPORTED dyad_rc_t dyad_init (bool debug, bool check, bool shared_storage, unsigned int key_depth, @@ -109,7 +109,7 @@ dyad_rc_t dyad_init (bool debug, * @param[out] ctx the newly initialized context * @return An error code */ -dyad_rc_t dyad_init_env (dyad_ctx_t** ctx); +DYAD_DLL_EXPORTED dyad_rc_t dyad_init_env (dyad_ctx_t** ctx); /** * @brief Wrapper function that performs all the common tasks needed @@ -122,7 +122,7 @@ dyad_rc_t dyad_init_env (dyad_ctx_t** ctx); #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname); +DYAD_DLL_EXPORTED dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname); /** * @brief Wrapper function that performs all the common tasks needed @@ -135,7 +135,7 @@ dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname); #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname); +DYAD_DLL_EXPORTED dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname); /** * @brief Finalizes the DYAD instance and deallocates the context @@ -143,10 +143,10 @@ dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname); * * @return An integer error code (values TBD) */ -dyad_rc_t dyad_finalize (dyad_ctx_t** ctx); +DYAD_DLL_EXPORTED dyad_rc_t dyad_finalize (dyad_ctx_t** ctx); #if DYAD_SYNC_DIR -int dyad_sync_directory (dyad_ctx_t* ctx, const char* path); +DYAD_DLL_EXPORTED int dyad_sync_directory (dyad_ctx_t* ctx, const char* path); #endif #ifdef __cplusplus diff --git a/src/core/dyad_core.sym b/src/core/dyad_core.sym deleted file mode 100644 index 68e72d86..00000000 --- a/src/core/dyad_core.sym +++ /dev/null @@ -1,6 +0,0 @@ -dyad_init -dyad_init_env -dyad_consume -dyad_produce -dyad_finalize -dyad_ctx_default diff --git a/src/core/dyad_dtl_defs.h b/src/core/dyad_dtl_defs.h deleted file mode 100644 index 93421925..00000000 --- a/src/core/dyad_dtl_defs.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __DYAD_DTL_DEFS_H__ -#define __DYAD_DTL_DEFS_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct dyad_dtl dyad_dtl_t; - -enum dyad_dtl_mode { - DYAD_DTL_FLUX_RPC = 0, - DYAD_DTL_UCX = 1, - // TODO Figure out how to use Flux RPC - // as a fallback if other DTLs fail - // DYAD_DTL_UCX_W_FALLBACK, -}; - -typedef enum dyad_dtl_mode dyad_dtl_mode_t; - -#ifdef __cplusplus -} -#endif - -#endif /* __DYAD_DTL_DEFS_H__ */ diff --git a/src/dtl/Makefile.am b/src/dtl/Makefile.am new file mode 100644 index 00000000..287e8158 --- /dev/null +++ b/src/dtl/Makefile.am @@ -0,0 +1,25 @@ +noinst_LTLIBRARIES = libdyad_dtl.la +libdyad_dtl_la_SOURCES = \ + dyad_dtl_impl.c \ + dyad_dtl_impl.h \ + dyad_rc.h \ + flux_dtl.c \ + flux_dtl.h \ + ucx_dtl.c \ + ucx_dtl.h +libdyad_dtl_la_LIBADD = \ + $(top_builddir)/src/utils/libutils.la \ + $(top_builddir)/src/utils/libmurmur3.la \ + $(UCX_LIBS) \ + $(JANSSON_LIBS) \ + $(FLUX_CORE_LIBS) +libdyad_dtl_la_CFLAGS = \ + $(AM_CFLAGS) \ + -I$(top_srcdir)/src/utils \ + -I$(top_srcdir)/src/utils/base64 \ + $(UCX_CFLAGS) \ + $(JANSSON_CFLAGS) \ + $(FLUX_CORE_CLFAGS) \ + -fvisibility=hidden + +include_HEADERS = dyad_rc.h dyad_flux_log.h dyad_dtl.h \ No newline at end of file diff --git a/src/dtl/dyad_dtl.h b/src/dtl/dyad_dtl.h new file mode 100644 index 00000000..0223536e --- /dev/null +++ b/src/dtl/dyad_dtl.h @@ -0,0 +1,20 @@ +#ifndef DYAD_DTL_H +#define DYAD_DTL_H + +#ifdef __cplusplus +extern "C" { +#endif + +enum dyad_dtl_mode { + DYAD_DTL_UCX = 0, + DYAD_DTL_FLUX_RPC = 1, +}; +typedef enum dyad_dtl_mode dyad_dtl_mode_t; + +struct dyad_dtl; + +#ifdef __cplusplus +} +#endif + +#endif /* DYAD_DTL_H */ \ No newline at end of file diff --git a/src/dtl/dyad_dtl_impl.c b/src/dtl/dyad_dtl_impl.c new file mode 100644 index 00000000..6b394bfc --- /dev/null +++ b/src/dtl/dyad_dtl_impl.c @@ -0,0 +1,82 @@ +#include "dyad_dtl_impl.h" + +#include "ucx_dtl.h" +#include "flux_dtl.h" + +dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, + dyad_dtl_mode_t mode, flux_t *h, bool debug) +{ + dyad_rc_t rc = DYAD_RC_OK; + *dtl_handle = malloc (sizeof (struct dyad_dtl)); + if (*dtl_handle == NULL) { + return DYAD_RC_SYSFAIL; + } + (*dtl_handle)->mode = mode; + if (mode == DYAD_DTL_UCX) { + rc = dyad_dtl_ucx_init ( + &((*dtl_handle)->private.ucx_dtl_handle), + mode, + h, + debug + ); + if (DYAD_IS_ERROR(rc)) { + return rc; + } + (*dtl_handle)->rpc_pack = dyad_dtl_ucx_rpc_pack; + (*dtl_handle)->rpc_unpack = dyad_dtl_ucx_rpc_unpack; + (*dtl_handle)->rpc_respond = dyad_dtl_ucx_rpc_respond; + (*dtl_handle)->rpc_recv_response = dyad_dtl_ucx_rpc_recv_response; + (*dtl_handle)->establish_connection = dyad_dtl_ucx_establish_connection; + (*dtl_handle)->send = dyad_dtl_ucx_send; + (*dtl_handle)->recv = dyad_dtl_ucx_recv; + (*dtl_handle)->close_connection = dyad_dtl_ucx_close_connection; + return DYAD_RC_OK; + } else if (mode == DYAD_DTL_FLUX_RPC) { + rc = dyad_dtl_flux_init ( + &((*dtl_handle)->private.flux_dtl_handle), + mode, + h, + debug + ); + if (DYAD_IS_ERROR(rc)) { + return rc; + } + (*dtl_handle)->rpc_pack = dyad_dtl_flux_rpc_pack; + (*dtl_handle)->rpc_unpack = dyad_dtl_flux_rpc_unpack; + (*dtl_handle)->rpc_respond = dyad_dtl_flux_rpc_respond; + (*dtl_handle)->rpc_recv_response = dyad_dtl_flux_rpc_recv_response; + (*dtl_handle)->establish_connection = dyad_dtl_flux_establish_connection; + (*dtl_handle)->send = dyad_dtl_flux_send; + (*dtl_handle)->recv = dyad_dtl_flux_recv; + (*dtl_handle)->close_connection = dyad_dtl_flux_close_connection; + return DYAD_RC_OK; + } + return DYAD_RC_BADDTLMODE; +} + +dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle) +{ + dyad_rc_t rc = DYAD_RC_OK; + if (dtl_handle == NULL || *dtl_handle == NULL) + return DYAD_RC_OK; + if ((*dtl_handle)->mode == DYAD_DTL_UCX) { + if ((*dtl_handle)->private.ucx_dtl_handle != NULL) { + rc = dyad_dtl_ucx_finalize (dtl_handle); + if (DYAD_IS_ERROR(rc)) { + return rc; + } + } + } else if ((*dtl_handle)->mode == DYAD_DTL_FLUX_RPC) { + if ((*dtl_handle)->private.flux_dtl_handle != NULL) { + rc = dyad_dtl_flux_finalize (dtl_handle); + if (DYAD_IS_ERROR(rc)) { + return rc; + } + } + } else { + return DYAD_RC_BADDTLMODE; + } + free (*dtl_handle); + *dtl_handle = NULL; + return DYAD_RC_OK; +} diff --git a/src/dtl/dyad_dtl_impl.h b/src/dtl/dyad_dtl_impl.h new file mode 100644 index 00000000..513a20e2 --- /dev/null +++ b/src/dtl/dyad_dtl_impl.h @@ -0,0 +1,64 @@ +#ifndef DYAD_DTL_IMPL_H +#define DYAD_DTL_IMPL_H + +#include "dyad_rc.h" +#include "dyad_flux_log.h" + +#include +#include + +#include "dyad_dtl.h" + +#ifdef __cplusplus +#include + +extern "C" { +#else +#include +#endif + +// Forward declarations of DTL contexts for the underlying implementations +struct dyad_dtl_ucx; +struct dyad_dtl_flux; + +// Union type to store the underlying DTL contexts +union dyad_dtl_private { + struct dyad_dtl_ucx* ucx_dtl_handle; + struct dyad_dtl_flux* flux_dtl_handle; +}; +typedef union dyad_dtl_private dyad_dtl_private_t; + +enum dyad_dtl_comm_mode { + DYAD_COMM_NONE = 0, // Sanity check value for when connection isn't established + DYAD_COMM_RECV = 1, // DTL connection will only receive data + DYAD_COMM_SEND = 2, // DTL connection will only send data +}; +typedef enum dyad_dtl_comm_mode dyad_dtl_comm_mode_t; + +struct dyad_dtl { + dyad_dtl_private_t private; + dyad_dtl_mode_t mode; + dyad_rc_t (*rpc_pack)(struct dyad_dtl* self, const char* upath, + uint32_t producer_rank, json_t** packed_obj); + dyad_rc_t (*rpc_unpack)(struct dyad_dtl* self, + const flux_msg_t* packed_obj, char** upath); + dyad_rc_t (*rpc_respond)(struct dyad_dtl* self, const flux_msg_t* orig_msg); + dyad_rc_t (*rpc_recv_response)(struct dyad_dtl* self, flux_future_t* f); + dyad_rc_t (*establish_connection)(struct dyad_dtl* self, + dyad_dtl_comm_mode_t comm_mode); + dyad_rc_t (*send)(struct dyad_dtl* self, void* buf, size_t buflen); + dyad_rc_t (*recv)(struct dyad_dtl* self, void** buf, size_t* buflen); + dyad_rc_t (*close_connection)(struct dyad_dtl* self); +}; +typedef struct dyad_dtl dyad_dtl_t; + +dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, + dyad_dtl_mode_t mode, flux_t *h, bool debug); + +dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle); + +#ifdef __cplusplus +} +#endif + +#endif /* DYAD_DTL_IMPL_H */ diff --git a/src/core/dyad_flux_log.h b/src/dtl/dyad_flux_log.h similarity index 100% rename from src/core/dyad_flux_log.h rename to src/dtl/dyad_flux_log.h diff --git a/src/core/dyad_rc.h b/src/dtl/dyad_rc.h similarity index 92% rename from src/core/dyad_rc.h rename to src/dtl/dyad_rc.h index a9034281..0a028b2b 100644 --- a/src/core/dyad_rc.h +++ b/src/dtl/dyad_rc.h @@ -1,6 +1,12 @@ #ifndef DYAD_CORE_DYAD_RC_H #define DYAD_CORE_DYAD_RC_H +#if BUILDING_DYAD +#define DYAD_DLL_EXPORTED __attribute__((__visibility__("default"))) +#else +#define DYAD_DLL_EXPORTED +#endif + enum dyad_core_return_codes { DYAD_RC_OK = 0, // Operation worked correctly DYAD_RC_SYSFAIL = -1, // Some sys call or C standard diff --git a/src/dtl/flux_dtl.c b/src/dtl/flux_dtl.c new file mode 100644 index 00000000..4e5064ab --- /dev/null +++ b/src/dtl/flux_dtl.c @@ -0,0 +1,144 @@ +#include "flux_dtl.h" + +dyad_rc_t dyad_dtl_flux_init (dyad_dtl_flux_t** dtl_handle, + dyad_dtl_mode_t mode, flux_t* h, bool debug) +{ + *dtl_handle = malloc(sizeof(struct dyad_dtl_flux)); + if (*dtl_handle == NULL) { + FLUX_LOG_ERR (h, "Cannot allocate the Flux DTL handle\n"); + return DYAD_RC_SYSFAIL; + } + (*dtl_handle)->h = h; + (*dtl_handle)->debug = debug; + (*dtl_handle)->f = NULL; + (*dtl_handle)->msg = NULL; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* self, const char* upath, + uint32_t producer_rank, json_t** packed_obj) +{ + dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; + *packed_obj = json_pack( + "{s:s}", + "upath", + upath + ); + if (*packed_obj == NULL) { + FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath for Flux DTL\n"); + return DYAD_RC_BADPACK; + } + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, + const flux_msg_t* msg, char** upath) +{ + int errcode = 0; + errcode = flux_request_unpack ( + msg, + NULL, + "{s:s}", + "upath", + upath + ); + if (errcode < 0) { + FLUX_LOG_ERR ( + self->private.flux_dtl_handle->h, + "Could not unpack Flux message from consumer\n" + ); + // TODO create new RC for this + return -1; + } + self->private.flux_dtl_handle->msg = msg; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_rpc_respond (dyad_dtl_t* self, + const flux_msg_t* orig_msg) +{ + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f) +{ + self->private.flux_dtl_handle->f = f; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_establish_connection (dyad_dtl_t* self, + dyad_dtl_comm_mode_t comm_mode) +{ + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen) +{ + int errcode = 0; + FLUX_LOG_INFO ( + self->private.flux_dtl_handle->h, + "Send data to consumer using a Flux RPC response" + ); + errcode = flux_respond_raw ( + self->private.flux_dtl_handle->h, + self->private.flux_dtl_handle->msg, + buf, + (int) buflen + ); + if (errcode < 0) { + FLUX_LOG_ERR ( + self->private.flux_dtl_handle->h, + "Could not send Flux RPC response containing file contents\n" + ); + return DYAD_RC_FLUXFAIL; + } + if (self->private.flux_dtl_handle->debug) { + FLUX_LOG_INFO ( + self->private.flux_dtl_handle->h, + "Successfully sent file contents to consumer\n" + ); + } + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_recv (dyad_dtl_t* self, void** buf, size_t* buflen) +{ + int rc = 0; + errno = 0; + dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; + FLUX_LOG_INFO (dtl_handle->h, "Get file contents from module using Flux RPC\n"); + if (dtl_handle->f == NULL) { + FLUX_LOG_ERR (dtl_handle->h, "Cannot get data using RPC without a Flux future\n"); + // TODO create new RC for this + return -1; + } + rc = flux_rpc_get_raw(dtl_handle->f, (const void**) buf, (int*) buflen); + if (rc < 0) { + FLUX_LOG_ERR (dtl_handle->h, "Could not get file data from Flux RPC\n"); + if (errno == ENODATA) + return DYAD_RC_RPC_FINISHED; + return DYAD_RC_BADRPC; + } + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_close_connection (dyad_dtl_t* self) +{ + if (self->private.flux_dtl_handle->f != NULL) + self->private.flux_dtl_handle->f = NULL; + if (self->private.flux_dtl_handle->msg != NULL) + self->private.flux_dtl_handle->msg = NULL; + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_flux_finalize (dyad_dtl_t** self) +{ + if (self == NULL || *self == NULL) + return DYAD_RC_OK; + (*self)->private.flux_dtl_handle->h = NULL; + (*self)->private.flux_dtl_handle->f = NULL; + (*self)->private.flux_dtl_handle->msg = NULL; + free((*self)->private.flux_dtl_handle); + (*self)->private.flux_dtl_handle = NULL; + return DYAD_RC_OK; +} \ No newline at end of file diff --git a/src/dtl/flux_dtl.h b/src/dtl/flux_dtl.h new file mode 100644 index 00000000..6af5665c --- /dev/null +++ b/src/dtl/flux_dtl.h @@ -0,0 +1,41 @@ +#ifndef DYAD_DTL_FLUX_H +#define DYAD_DTL_FLUX_H + +#include "dyad_dtl_impl.h" + +#include + +struct dyad_dtl_flux { + flux_t* h; + bool debug; + flux_future_t *f; + flux_msg_t* msg; +}; + +typedef struct dyad_dtl_flux dyad_dtl_flux_t; + +dyad_rc_t dyad_dtl_flux_init (dyad_dtl_flux_t** dtl_handle, + dyad_dtl_mode_t mode, flux_t* h, bool debug); + +dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* self, const char* upath, + uint32_t producer_rank, json_t** packed_obj); + +dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, + const flux_msg_t* msg, char** upath); + +dyad_rc_t dyad_dtl_flux_rpc_respond (dyad_dtl_t* self, const flux_msg_t* orig_msg); + +dyad_rc_t dyad_dtl_flux_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f); + +dyad_rc_t dyad_dtl_flux_establish_connection (dyad_dtl_t* self, + dyad_dtl_comm_mode_t comm_mode); + +dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen); + +dyad_rc_t dyad_dtl_flux_recv (dyad_dtl_t* self, void** buf, size_t* buflen); + +dyad_rc_t dyad_dtl_flux_close_connection (dyad_dtl_t* self); + +dyad_rc_t dyad_dtl_flux_finalize (dyad_dtl_t** self); + +#endif /* DYAD_DTL_FLUX_H */ \ No newline at end of file diff --git a/src/core/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c similarity index 54% rename from src/core/dtl/ucx_dtl.c rename to src/dtl/ucx_dtl.c index 2dbbdff7..54bcb1ad 100644 --- a/src/core/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -1,20 +1,11 @@ #include "ucx_dtl.h" -#include "dyad_rc.h" - #include "base64.h" -#ifdef __cplusplus -#include -#include -#include -#include -#else #include #include #include #include -#endif extern const base64_maps_t base64_maps_rfc4648; @@ -23,7 +14,7 @@ extern const base64_maps_t base64_maps_rfc4648; // Macro function used to simplify checking the status // of UCX operations -#define UCX_STATUS_FAIL(status) status != UCS_OK +#define UCX_STATUS_FAIL(status) (status != UCS_OK) // Define a request struct to be used in handling // async UCX operations @@ -46,23 +37,34 @@ static void dyad_ucx_request_init(void *request) #if UCP_API_VERSION >= UCP_VERSION(1, 10) static void dyad_recv_callback(void *request, ucs_status_t status, const ucp_tag_recv_info_t *tag_info, void *user_data) -{ - dyad_ucx_request_t *real_request = NULL; - real_request = (dyad_ucx_request_t*) request; - real_request->completed = 1; -} #else static void dyad_recv_callback(void *request, ucs_status_t status, ucp_tag_recv_info_t *tag_info) +#endif { dyad_ucx_request_t *real_request = NULL; real_request = (dyad_ucx_request_t*) request; real_request->completed = 1; } + +#if UCP_API_VERSION >= UCP_VERSION(1, 10) +static void dyad_send_callback(void *req, ucs_status_t status, void *ctx) +#else +static void dyad_send_callback(void *req, ucs_status_t status) #endif +{ + dyad_ucx_request_t *real_req = (dyad_ucx_request_t*)req; + real_req->completed = 1; +} + +void dyad_ucx_ep_err_handler (void* arg, ucp_ep_h ep, ucs_status_t status) { + flux_t* h = (flux_t*) arg; + FLUX_LOG_ERR (h, "An error occured on the UCP endpoint (status = %d)\n", status); +} // Simple function used to wait on the async receive -static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, dyad_ucx_request_t *request) +static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, + dyad_ucx_request_t *request) { ucs_status_t final_request_status = UCS_OK; // If 'request' is actually a request handle, this means the communication operation @@ -98,8 +100,36 @@ static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, dyad_ucx_r return UCS_OK; } -dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, - bool debug, dyad_dtl_ucx_t **dtl_handle) +static inline dyad_rc_t dyad_dtl_ucx_finalize_impl (dyad_dtl_ucx_t **dtl_handle) +{ + // Release consumer address if not already released + if ((*dtl_handle)->consumer_address != NULL) { + ucp_worker_release_address( + (*dtl_handle)->ucx_worker, + (*dtl_handle)->consumer_address + ); + (*dtl_handle)->consumer_address = NULL; + } + // Release worker if not already released + if ((*dtl_handle)->ucx_worker != NULL) { + ucp_worker_destroy((*dtl_handle)->ucx_worker); + (*dtl_handle)->ucx_worker = NULL; + } + // Release context if not already released + if ((*dtl_handle)->ucx_ctx != NULL) { + ucp_cleanup((*dtl_handle)->ucx_ctx); + (*dtl_handle)->ucx_ctx = NULL; + } + // Flux handle should be released by the + // DYAD context, so it is not released here + (*dtl_handle)->h = NULL; + // Free the handle and set to NULL to prevent double free + free(*dtl_handle); + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_ucx_t** dtl_handle, dyad_dtl_mode_t mode, + flux_t *h, bool debug) { ucp_params_t ucx_params; ucp_worker_params_t worker_params; @@ -108,27 +138,26 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, ucp_worker_attr_t worker_attrs; *dtl_handle = malloc(sizeof(struct dyad_dtl_ucx)); - if (*dtl_handle == NULL) - { + if (*dtl_handle == NULL) { FLUX_LOG_ERR (h, "Could not allocate UCX DTL context\n"); return DYAD_RC_SYSFAIL; } // Allocation/Freeing of the Flux handle should be // handled by the DYAD context (*dtl_handle)->h = h; - // Allocation/Freeing of kvs_namespace should be - // handled by the DYAD context - (*dtl_handle)->kvs_namespace = kvs_namespace; + (*dtl_handle)->debug = debug; (*dtl_handle)->ucx_ctx = NULL; (*dtl_handle)->ucx_worker = NULL; + (*dtl_handle)->ep = NULL; + (*dtl_handle)->curr_comm_mode = DYAD_COMM_NONE; (*dtl_handle)->consumer_address = NULL; (*dtl_handle)->addr_len = 0; + (*dtl_handle)->comm_tag = 0; // Read the UCX configuration FLUX_LOG_INFO ((*dtl_handle)->h, "Reading UCP config\n"); status = ucp_config_read (NULL, NULL, &config); - if (UCX_STATUS_FAIL(status)) - { + if (UCX_STATUS_FAIL(status)) { FLUX_LOG_ERR ((*dtl_handle)->h, "Could not read the UCX config\n"); goto error; } @@ -156,8 +185,7 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, status = ucp_init(&ucx_params, config, &(*dtl_handle)->ucx_ctx); // If in debug mode, print the configuration of UCX to stderr - if (debug) - { + if (debug) { ucp_config_print( config, stderr, @@ -168,8 +196,7 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, // Release the config ucp_config_release(config); // Log an error if UCX initialization failed - if (UCX_STATUS_FAIL(status)) - { + if (UCX_STATUS_FAIL(status)) { FLUX_LOG_ERR (h, "ucp_init failed (status = %d)\n", status); goto error; } @@ -192,8 +219,7 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, &worker_params, &(*dtl_handle)->ucx_worker ); - if (UCX_STATUS_FAIL(status)) - { + if (UCX_STATUS_FAIL(status)) { FLUX_LOG_ERR (h, "ucp_worker_create failed (status = %d)!\n", status); goto error; } @@ -205,8 +231,7 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, (*dtl_handle)->ucx_worker, &worker_attrs ); - if (UCX_STATUS_FAIL(status)) - { + if (UCX_STATUS_FAIL(status)) { FLUX_LOG_ERR (h, "Cannot get UCX worker address (status = %d)!\n", status); goto error; } @@ -218,18 +243,18 @@ dyad_rc_t dyad_dtl_ucx_init(flux_t *h, const char *kvs_namespace, error:; // If an error occured, finalize the DTL handle and // return a failing error code - // dyad_dtl_ucx_finalize(*dtl_handle); + dyad_dtl_ucx_finalize_impl(dtl_handle); return DYAD_RC_UCXINIT_FAIL; } -dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, const char *upath, +dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t *self, const char *upath, uint32_t producer_rank, json_t **packed_obj) { size_t enc_len = 0; char* enc_buf = NULL; ssize_t enc_size = 0; - if (dtl_handle->consumer_address == NULL) - { + dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; + if (dtl_handle->consumer_address == NULL) { // TODO log error return DYAD_RC_BADPACK; } @@ -238,8 +263,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, const char *upath, // Add 1 to encoded length because the encoded buffer will be // packed as if it is a string enc_buf = malloc(enc_len+1); - if (enc_buf == NULL) - { + if (enc_buf == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not allocate buffer for packed address\n"); return DYAD_RC_SYSFAIL; } @@ -249,8 +273,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, const char *upath, enc_size = base64_encode_using_maps(&base64_maps_rfc4648, enc_buf, enc_len+1, (const char*)dtl_handle->consumer_address, dtl_handle->addr_len); - if (enc_size < 0) - { + if (enc_size < 0) { // TODO log error free(enc_buf); return DYAD_RC_BADPACK; @@ -261,8 +284,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, const char *upath, // Instead, we use this function to create the tag that will be // used for the upcoming communication. uint32_t consumer_rank = 0; - if (flux_get_rank(dtl_handle->h, &consumer_rank) < 0) - { + if (flux_get_rank(dtl_handle->h, &consumer_rank) < 0) { FLUX_LOG_ERR (dtl_handle->h, "Cannot get consumer rank\n"); return DYAD_RC_FLUXFAIL; } @@ -286,35 +308,172 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_ucx_t *dtl_handle, const char *upath, ); free(enc_buf); // If the packing failed, log an error - if (*packed_obj == NULL) - { + if (*packed_obj == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath and UCX address for RPC\n"); return DYAD_RC_BADPACK; } return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_recv_rpc_response(dyad_dtl_ucx_t *dtl_handle, +dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char** upath) +{ + char* enc_addr = NULL; + size_t enc_addr_len = 0; + int errcode = 0; + uint32_t tag_prod = 0; + uint32_t tag_cons = 0; + ssize_t decoded_len = 0; + dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; + FLUX_LOG_INFO (dtl_handle->h, "Unpacking RPC payload\n"); + errcode = flux_request_unpack( + msg, + NULL, + "{s:s, s:i, s:i, s:s%}", + "upath", + upath, + "tag_prod", + &tag_prod, + "tag_cons", + &tag_cons, + "ucx_addr", + &enc_addr, + &enc_addr_len + ); + if (errcode < 0) + { + FLUX_LOG_ERR(dtl_handle->h, "Could not unpack Flux message from consumer!\n"); + return -1; + } + dtl_handle->comm_tag = ((uint64_t)tag_prod << 32) | (uint64_t)tag_cons; + FLUX_LOG_INFO (dtl_handle->h, "Obtained upath from RPC payload: %s\n", upath); + FLUX_LOG_INFO (dtl_handle->h, "Obtained UCP tag from RPC payload: %lu\n", dtl_handle->comm_tag); + FLUX_LOG_INFO (dtl_handle->h, "Decoding consumer UCP address using base64\n"); + dtl_handle->addr_len = base64_decoded_length(enc_addr_len); + dtl_handle->consumer_address = (ucp_address_t*) malloc(dtl_handle->addr_len); + decoded_len = base64_decode_using_maps (&base64_maps_rfc4648, + (char*)dtl_handle->consumer_address, dtl_handle->addr_len, + enc_addr, enc_addr_len); + if (decoded_len < 0) + { + // TODO log error + free(dtl_handle->consumer_address); + dtl_handle->consumer_address = NULL; + dtl_handle->addr_len = 0; + return -1; + } + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_rpc_respond (dyad_dtl_t* self, const flux_msg_t* orig_msg) +{ + return DYAD_RC_OK; +} + +dyad_rc_t dyad_dtl_ucx_rpc_recv_response(dyad_dtl_t *self, flux_future_t *f) { return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_establish_connection(dyad_dtl_ucx_t *dtl_handle) +dyad_rc_t dyad_dtl_ucx_establish_connection(dyad_dtl_t *self, + dyad_dtl_comm_mode_t comm_mode) { + ucp_ep_params_t params; + ucs_status_t status = UCS_OK; + dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; + dtl_handle->curr_comm_mode = comm_mode; + if (comm_mode == DYAD_COMM_SEND) { + params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | + UCP_EP_PARAM_FIELD_ERR_HANDLER; + params.address = dtl_handle->consumer_address; + params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + params.err_handler.cb = dyad_ucx_ep_err_handler; + params.err_handler.arg = (void*) dtl_handle->h; + FLUX_LOG_INFO (dtl_handle->h, "Create UCP endpoint for communication with consumer\n"); + status = ucp_ep_create( + dtl_handle->ucx_worker, + ¶ms, + &dtl_handle->ep + ); + if (status != UCS_OK) + { + FLUX_LOG_ERR (dtl_handle->h, "ucp_ep_create failed with status %d\n", (int) status); + return -1; + } + if (dtl_handle->debug) + { + ucp_ep_print_info(dtl_handle->ep, stderr); + } + return DYAD_RC_OK; + } else if (comm_mode == DYAD_COMM_RECV) { + FLUX_LOG_INFO (dtl_handle->h, "No explicit connection establishment needed for UCX receiver\n"); + return DYAD_RC_OK; + } else { + FLUX_LOG_ERR (dtl_handle->h, "Invalid communication mode: %d\n", comm_mode); + // TODO create new RC for this + return -1; + } +} + +dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen) +{ + ucs_status_ptr_t stat_ptr; + ucs_status_t status = UCS_OK; + dyad_ucx_request_t *req = NULL; + dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; + if (dtl_handle->ep == NULL) + { + FLUX_LOG_INFO(dtl_handle->h, "UCP endpoint was not created prior to invoking send!\n"); + return DYAD_RC_UCXCOMM_FAIL; + } + // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 + // However, some systems (e.g., Lassen) may have an older verison + // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, + // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. +#if UCP_API_VERSION >= UCP_VERSION(1, 10) + ucp_request_param_t params; + params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; + params.cb.send = dyad_send_callback; + FLUX_LOG_INFO (dtl_handle->h, "Sending data to consumer with ucp_tag_send_nbx\n"); + stat_ptr = ucp_tag_send_nbx( + dtl_handle->ep, + buf, + buflen, + dtl_handle->comm_tag, + ¶ms + ); +#else + FLUX_LOG_INFO (dtl_handle->h, "Sending %lu bytes of data to consumer with ucp_tag_send_nb\n", buflen); + stat_ptr = ucp_tag_send_nb( + dtl_handle->ep, + buf, + buflen, + UCP_DATATYPE_CONTIG, + dtl_handle->comm_tag, + dyad_send_callback + ); +#endif + FLUX_LOG_INFO (dtl_handle->h, "Processing UCP send request\n"); + status = dyad_ucx_request_wait (dtl_handle, stat_ptr); + if (status != UCS_OK) + { + FLUX_LOG_ERR (dtl_handle->h, "UCP Tag Send failed (status = %d)!\n", (int)status); + return DYAD_RC_UCXCOMM_FAIL; + } + FLUX_LOG_INFO (dtl_handle->h, "Data send with UCP succeeded\n"); return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, - void **buf, size_t *buflen) +dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_t *self, void **buf, size_t *buflen) { - ucs_status_t status; + ucs_status_t status = UCS_OK; ucp_tag_message_h msg = NULL; ucp_tag_recv_info_t msg_info; dyad_ucx_request_t* req = NULL; - // Use 'ucp_worker_wait' to poll the worker until - // the tag recv event that we're looking for comes in. + dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; FLUX_LOG_INFO (dtl_handle->h, "Poll UCP for incoming data\n"); + // TODO replace this loop with a resiliency response over RPC do { ucp_worker_progress (dtl_handle->ucx_worker); msg = ucp_tag_probe_nb( @@ -436,52 +595,101 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_ucx_t *dtl_handle, return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_close_connection(dyad_dtl_ucx_t *dtl_handle) -{ - // Since we're using tag send/recv, there's no need - // to explicitly close the connection. So, all we're - // doing here is setting the tag back to 0 (which cannot - // be valid for DYAD because DYAD won't send a file from - // one node to the same node). - dtl_handle->comm_tag = 0; - return DYAD_RC_OK; -} - -dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_ucx_t *dtl_handle) +dyad_rc_t dyad_dtl_ucx_close_connection(dyad_dtl_t *self) { - if (dtl_handle != NULL) - { - FLUX_LOG_INFO (dtl_handle->h, "Finalizing UCX DTL\n"); - // KVS namespace string should be released by the - // DYAD context, so it is not released here - dtl_handle->kvs_namespace = NULL; - // Release consumer address if not already released - if (dtl_handle->consumer_address != NULL) + ucs_status_t status = UCS_OK; + ucs_status_ptr_t stat_ptr; + dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; + if (dtl_handle->curr_comm_mode == DYAD_COMM_SEND) { + if (dtl_handle != NULL) { - ucp_worker_release_address( - dtl_handle->ucx_worker, - dtl_handle->consumer_address - ); - dtl_handle->consumer_address = NULL; - } - // Release worker if not already released - if (dtl_handle->ucx_worker != NULL) - { - ucp_worker_destroy(dtl_handle->ucx_worker); - dtl_handle->ucx_worker = NULL; - } - // Release context if not already released - if (dtl_handle->ucx_ctx != NULL) - { - ucp_cleanup(dtl_handle->ucx_ctx); - dtl_handle->ucx_ctx = NULL; + if (dtl_handle->ep != NULL) + { + // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 + // However, some systems (e.g., Lassen) may have an older verison + // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, + // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. + FLUX_LOG_INFO (dtl_handle->h, "Start async closing of UCP endpoint\n"); +#if UCP_API_VERSION >= UCP_VERSION(1, 10) + ucp_request_param_t close_params; + close_params.op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS; + close_params.flags = UCP_EP_CLOSE_FLAG_FORCE; + stat_ptr = ucp_ep_close_nbx(dtl_handle->ep, &close_params); +#else + // TODO change to FORCE if we decide to enable err handleing mode + stat_ptr = ucp_ep_close_nb(dtl_handle->ep, UCP_EP_CLOSE_MODE_FORCE); +#endif + FLUX_LOG_INFO (dtl_handle->h, "Wait for endpoint closing to finish\n"); + // Don't use dyad_ucx_request_wait here because ep_close behaves + // differently than other UCX calls + if (stat_ptr != NULL) + { + // Endpoint close is in-progress. + // Wait until finished + if (UCS_PTR_IS_PTR(stat_ptr)) + { + do { + ucp_worker_progress(dtl_handle->ucx_worker); + status = ucp_request_check_status(stat_ptr); + } while (status == UCS_INPROGRESS); + ucp_request_free(stat_ptr); + } + // An error occurred during endpoint closure + // However, the endpoint can no longer be used + // Get the status code for reporting + else + { + status = UCS_PTR_STATUS(stat_ptr); + } + if (status != UCS_OK) + { + FLUX_LOG_ERR(dtl_handle->h, "Could not successfully close Endpoint (status = %d)! However, endpoint was released.\n", status); + } + } + dtl_handle->ep = NULL; + } + // Sender doesn't have a consumer address at this time + // So, free the consumer address when closing the connection + if (dtl_handle->consumer_address != NULL) + { + free(dtl_handle->consumer_address); + dtl_handle->consumer_address = NULL; + dtl_handle->addr_len = 0; + } + dtl_handle->comm_tag = 0; } - // Flux handle should be released by the - // DYAD context, so it is not released here - dtl_handle->h = NULL; - // Free the handle and set to NULL to prevent double free - free(dtl_handle); - dtl_handle = NULL; + FLUX_LOG_INFO (dtl_handle->h, "UCP endpoint close successful\n"); + return DYAD_RC_OK; + } else if (dtl_handle->curr_comm_mode == DYAD_COMM_RECV) { + // Since we're using tag send/recv, there's no need + // to explicitly close the connection. So, all we're + // doing here is setting the tag back to 0 (which cannot + // be valid for DYAD because DYAD won't send a file from + // one node to the same node). + dtl_handle->comm_tag = 0; + return DYAD_RC_OK; + } else { + FLUX_LOG_ERR (dtl_handle->h, "Somehow, an invalid comm mode reached 'close_connection'\n"); + // TODO create new RC for this case + return -1; } - return DYAD_RC_OK; } + +dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_t **self) +{ + dyad_dtl_ucx_t* dtl_handle = NULL; + dyad_rc_t rc = DYAD_RC_OK; + if (self == NULL || *self == NULL || + (*self)->private.ucx_dtl_handle == NULL) { + return DYAD_RC_OK; + } + dtl_handle = (*self)->private.ucx_dtl_handle; + FLUX_LOG_INFO (dtl_handle->h, "Finalizing UCX DTL\n"); + if (dtl_handle->ep != NULL) { + dyad_dtl_ucx_close_connection (*self); + dtl_handle->ep = NULL; + } + rc = dyad_dtl_ucx_finalize_impl (&dtl_handle); + (*self)->private.ucx_dtl_handle = NULL; + return rc; +} \ No newline at end of file diff --git a/src/dtl/ucx_dtl.h b/src/dtl/ucx_dtl.h new file mode 100644 index 00000000..9b92e5b6 --- /dev/null +++ b/src/dtl/ucx_dtl.h @@ -0,0 +1,48 @@ +#ifndef DYAD_DTL_UCX_H +#define DYAD_DTL_UCX_H + +#include "dyad_dtl_impl.h" + +#include + +#include + +struct dyad_dtl_ucx { + flux_t *h; + bool debug; + ucp_context_h ucx_ctx; + ucp_worker_h ucx_worker; + ucp_ep_h ep; + dyad_dtl_comm_mode_t curr_comm_mode; + ucp_address_t *consumer_address; + size_t addr_len; + ucp_tag_t comm_tag; +}; + +typedef struct dyad_dtl_ucx dyad_dtl_ucx_t; + +dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_ucx_t **dtl_handle, + dyad_dtl_mode_t mode, flux_t *h, bool debug); + +dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* self, const char* upath, + uint32_t producer_rank, json_t** packed_obj); + +dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, + const flux_msg_t* msg, char** upath); + +dyad_rc_t dyad_dtl_ucx_rpc_respond (dyad_dtl_t* self, const flux_msg_t* orig_msg); + +dyad_rc_t dyad_dtl_ucx_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f); + +dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t *self, + dyad_dtl_comm_mode_t comm_mode); + +dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen); + +dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen); + +dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self); + +dyad_rc_t dyad_dtl_ucx_finalize (dyad_dtl_t **self); + +#endif /* DYAD_DTL_UCX_H */ \ No newline at end of file diff --git a/src/modules/Makefile.am b/src/modules/Makefile.am index d384eefc..d0a8ee27 100644 --- a/src/modules/Makefile.am +++ b/src/modules/Makefile.am @@ -1,11 +1,5 @@ lib_LTLIBRARIES = dyad.la dyad_la_SOURCES = \ - dtl/dyad_mod_dtl.c \ - dtl/dyad_mod_dtl.h \ - dtl/flux_mod_dtl.c \ - dtl/flux_mod_dtl.h \ - dtl/ucx_mod_dtl.c \ - dtl/ucx_mod_dtl.h \ dyad.c \ dyad_flux_log.h dyad_la_LDFLAGS = \ @@ -13,24 +7,30 @@ dyad_la_LDFLAGS = \ -module \ -avoid-version \ -no-undefined \ + --disable-static \ -shared \ - -export-symbols-regex '^mod_(name|main)$$' \ + -export-dynamic \ -Wl,-rpath,'$(UCX_LIBDIR)' dyad_la_LIBADD = \ - $(top_builddir)/src/utils/libutils.la \ + $(top_builddir)/src/dtl/libdyad_dtl.la \ $(UCX_LIBS) \ $(JANSSON_LIBS) \ $(FLUX_CORE_LIBS) -dyad_la_CPPFLAGS = \ - $(AM_CPPFLAGS) \ +dyad_la_CFLAGS = \ + $(AM_CFLAGS) \ -I$(top_srcdir)/src/utils \ -I$(top_srcdir)/src/utils/base64 \ + -I$(top_srcdir)/src/dtl \ $(UCX_CFLAGS) \ $(JANSSON_CFLAGS) \ - $(FLUX_CORE_CFLAGS) + $(FLUX_CORE_CFLAGS) \ + -DBUILDING_DYAD \ + -fvisibility=hidden +dyad_la_CPPFLAGS = if PERFFLOW dyad_la_LIBADD += $(PERFFLOW_LIBS) -dyad_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +dyad_la_CFLAGS += $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +dyad_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) endif if URPC @@ -44,10 +44,12 @@ urpc_la_LIBADD = $(top_builddir)/src/utils/libutils.la $(FLUX_CORE_LIBS) $(JANSS # urpc_la_LIBS = -lb64 # TODO replace current CPPFLAGS rule with this one once libb64 is required # urpc_la_CPPFLAGS = $(FLUX_CORE_CFLAGS) $(JANSSON_CFLAGS) -I$(LIBB64_DIR)/include -I$(top_builddir)/src/common -urpc_la_CPPFLAGS = $(FLUX_CORE_CFLAGS) $(JANSSON_CFLAGS) -I$(top_builddir)/src/utils +urpc_la_CFLAGS = $(FLUX_CORE_CFLAGS) $(JANSSON_CFLAGS) -I$(top_builddir)/src/utils +urpc_la_CPPFLAGS = if PERFFLOW urpc_la_LIBADD += $(PERFFLOW_LIBS) -urpc_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +urpc_la_CFLAGS += $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +urpc_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) endif endif diff --git a/src/modules/dtl/dyad_mod_dtl.c b/src/modules/dtl/dyad_mod_dtl.c deleted file mode 100644 index 7fa9300c..00000000 --- a/src/modules/dtl/dyad_mod_dtl.c +++ /dev/null @@ -1,169 +0,0 @@ -#include "dyad_mod_dtl.h" - -#include "ucx_mod_dtl.h" -#include "flux_mod_dtl.h" - -struct dyad_mod_dtl { - dyad_mod_dtl_mode_t mode; - flux_t *h; - void *real_handle; -}; - -int dyad_mod_dtl_init(dyad_mod_dtl_mode_t mode, - flux_t *h, bool debug, - dyad_mod_dtl_t **dtl_handle) -{ - *dtl_handle = malloc(sizeof(struct dyad_mod_dtl)); - if (*dtl_handle == NULL) - { - FLUX_LOG_ERR (h, "Could not allocate a dyad_mode_dtl_t object\n"); - return -1; - } - (*dtl_handle)->mode = mode; - (*dtl_handle)->h = h; - if (mode == DYAD_DTL_UCX) - { - FLUX_LOG_INFO (h, "Initializing UCX DTL!\n"); - return dyad_mod_ucx_dtl_init( - h, - debug, - (dyad_mod_ucx_dtl_t**)&(*dtl_handle)->real_handle - ); - } - if (mode == DYAD_DTL_FLUX_RPC) - { - FLUX_LOG_INFO (h, "Initializing Flux RPC DTL!\n"); - return dyad_mod_flux_dtl_init( - h, - debug, - (dyad_mod_flux_dtl_t**)&(*dtl_handle)->real_handle - ); - } - FLUX_LOG_ERR (h, "Invalid DYAD DTL mode: %d\n", (int) mode); - return -1; -} - -int dyad_mod_dtl_rpc_unpack(dyad_mod_dtl_t *dtl_handle, - const flux_msg_t *packed_obj, char **upath) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) - { - return dyad_mod_ucx_dtl_rpc_unpack( - (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle, - packed_obj, - upath - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) - { - return dyad_mod_flux_dtl_rpc_unpack( - (dyad_mod_flux_dtl_t*)dtl_handle->real_handle, - packed_obj, - upath - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); - return -1; -} - -int dyad_mod_dtl_rpc_respond(dyad_mod_dtl_t *dtl_handle, const flux_msg_t *orig_msg) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) - { - return dyad_mod_ucx_dtl_rpc_respond( - (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle, - orig_msg - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) - { - return dyad_mod_flux_dtl_rpc_respond( - (dyad_mod_flux_dtl_t*)dtl_handle->real_handle, - orig_msg - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); - return -1; -} - -int dyad_mod_dtl_establish_connection(dyad_mod_dtl_t *dtl_handle) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) - { - return dyad_mod_ucx_dtl_establish_connection( - (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) - { - return dyad_mod_flux_dtl_establish_connection( - (dyad_mod_flux_dtl_t*)dtl_handle->real_handle - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); - return -1; -} - -int dyad_mod_dtl_send(dyad_mod_dtl_t *dtl_handle, void *buf, size_t buflen) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) - { - return dyad_mod_ucx_dtl_send( - (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle, - buf, - buflen - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) - { - return dyad_mod_flux_dtl_send( - (dyad_mod_flux_dtl_t*)dtl_handle->real_handle, - buf, - buflen - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); - return -1; -} - -int dyad_mod_dtl_close_connection(dyad_mod_dtl_t *dtl_handle) -{ - if (dtl_handle->mode == DYAD_DTL_UCX) - { - return dyad_mod_ucx_dtl_close_connection( - (dyad_mod_ucx_dtl_t*)dtl_handle->real_handle - ); - } - if (dtl_handle->mode == DYAD_DTL_FLUX_RPC) - { - return dyad_mod_flux_dtl_close_connection( - (dyad_mod_flux_dtl_t*)dtl_handle->real_handle - ); - } - FLUX_LOG_ERR (dtl_handle->h, "Invalid DYAD DTL mode: %d\n", (int) dtl_handle->mode); - return -1; -} - -int dyad_mod_dtl_finalize(dyad_mod_dtl_t **dtl_handle) -{ - if (dtl_handle == NULL || *dtl_handle == NULL) - return 0; - dyad_mod_dtl_mode_t mode = (*dtl_handle)->mode;; - flux_t *h = (*dtl_handle)->h; - void* real_handle = (*dtl_handle)->real_handle; - free(*dtl_handle); - if (mode == DYAD_DTL_UCX) - { - return dyad_mod_ucx_dtl_finalize( - (dyad_mod_ucx_dtl_t*)real_handle - ); - } - if (mode == DYAD_DTL_FLUX_RPC) - { - return dyad_mod_flux_dtl_finalize( - (dyad_mod_flux_dtl_t*)real_handle - ); - } - FLUX_LOG_ERR (h, "Invalid DYAD DTL mode: %d\n", (int) mode); - return -1; -} diff --git a/src/modules/dtl/dyad_mod_dtl.h b/src/modules/dtl/dyad_mod_dtl.h deleted file mode 100644 index 3e68a785..00000000 --- a/src/modules/dtl/dyad_mod_dtl.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef __DYAD_MOD_DTL_H__ -#define __DYAD_MOD_DTL_H__ - -#include - -#if defined(__cplusplus) -#include -#else -#include -#include -#endif /* defined(__cplusplus) */ - -typedef struct dyad_mod_dtl dyad_mod_dtl_t; - -enum dyad_mod_dtl_mode { - DYAD_DTL_FLUX_RPC, - DYAD_DTL_UCX, - // TODO Figure out how to use Flux RPC - // as a fallback for if UCX fails - // DYAD_DTL_UCX_W_FALLBACK, -}; - -typedef enum dyad_mod_dtl_mode dyad_mod_dtl_mode_t; - -int dyad_mod_dtl_init(dyad_mod_dtl_mode_t mode, flux_t *h, - bool debug, dyad_mod_dtl_t **dtl_handle); - -int dyad_mod_dtl_rpc_unpack(dyad_mod_dtl_t *dtl_handle, - const flux_msg_t *packed_obj, char **upath); - -int dyad_mod_dtl_rpc_respond(dyad_mod_dtl_t *dtl_handle, - const flux_msg_t *orig_msg); - -int dyad_mod_dtl_establish_connection(dyad_mod_dtl_t *dtl_handle); - -int dyad_mod_dtl_send(dyad_mod_dtl_t *dtl_handle, void *buf, size_t buflen); - -int dyad_mod_dtl_close_connection(dyad_mod_dtl_t *dtl_handle); - -int dyad_mod_dtl_finalize(dyad_mod_dtl_t **dtl_handle); - -#endif /* __DYAD_MOD_DTL_H__ */ diff --git a/src/modules/dtl/flux_mod_dtl.c b/src/modules/dtl/flux_mod_dtl.c deleted file mode 100644 index b8222374..00000000 --- a/src/modules/dtl/flux_mod_dtl.c +++ /dev/null @@ -1,82 +0,0 @@ -#include "flux_mod_dtl.h" - -int dyad_mod_flux_dtl_init(flux_t *h, bool debug, - dyad_mod_flux_dtl_t **dtl_handle) -{ - *dtl_handle = malloc(sizeof(struct dyad_mod_flux_dtl)); - if (*dtl_handle == NULL) - { - FLUX_LOG_ERR(h, "Cannot allocate a context for the Flux RPC DTL\n"); - return -1; - } - (*dtl_handle)->h = h; - (*dtl_handle)->debug = debug; - (*dtl_handle)->msg = NULL; - return 0; -} - -int dyad_mod_flux_dtl_rpc_unpack(dyad_mod_flux_dtl_t *dtl_handle, - const flux_msg_t *packed_obj, char **upath) -{ - int errcode = flux_request_unpack( - packed_obj, - NULL, - "{s:s}", - "upath", - upath - ); - if (errcode < 0) - { - FLUX_LOG_ERR(dtl_handle->h, "Could not unpack Flux message from consumer!\n"); - return -1; - } - // Save the flux_msg_t object here instead of dyad_mod_flux_dtl_rpc_respond - // to increase the odds that the compiler will optimize rpc_respond away - dtl_handle->msg = packed_obj; - return 0; -} - -int dyad_mod_flux_dtl_rpc_respond (dyad_mod_flux_dtl_t *dtl_handle, - const flux_msg_t *orig_msg) -{ - return 0; -} - -int dyad_mod_flux_dtl_establish_connection(dyad_mod_flux_dtl_t *dtl_handle) -{ - return 0; -} - -int dyad_mod_flux_dtl_send(dyad_mod_flux_dtl_t *dtl_handle, void *buf, size_t buflen) -{ - FLUX_LOG_INFO (dtl_handle->h, "Send data to consumer using a Flux RPC response"); - int errcode = flux_respond_raw(dtl_handle->h, dtl_handle->msg, buf, (int)buflen); - if (errcode < 0) - { - FLUX_LOG_ERR(dtl_handle->h, "Could not send Flux RPC response containing file contents!\n"); - return -1; - } - if (dtl_handle->debug) - { - FLUX_LOG_INFO(dtl_handle->h, "Successfully sent file contents to consumer!\n"); - } - return 0; -} - -int dyad_mod_flux_dtl_close_connection(dyad_mod_flux_dtl_t *dtl_handle) -{ - if (dtl_handle != NULL) - { - dtl_handle->msg = NULL; - } - return 0; -} - -int dyad_mod_flux_dtl_finalize(dyad_mod_flux_dtl_t *dtl_handle) -{ - if (dtl_handle != NULL) - { - free(dtl_handle); - } - return 0; -} diff --git a/src/modules/dtl/flux_mod_dtl.h b/src/modules/dtl/flux_mod_dtl.h deleted file mode 100644 index 2a3ecdf5..00000000 --- a/src/modules/dtl/flux_mod_dtl.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef __DYAD_MOD_FLUX_DTL_H__ -#define __DYAD_MOD_FLUX_DTL_H__ - -#include "dyad_flux_log.h" - -#ifdef __cplusplus -#include -#else -#include -#include -#endif - -struct dyad_mod_flux_dtl { - flux_t *h; - bool debug; - const flux_msg_t *msg; -}; - -typedef struct dyad_mod_flux_dtl dyad_mod_flux_dtl_t; - -int dyad_mod_flux_dtl_init(flux_t *h, bool debug, - dyad_mod_flux_dtl_t **dtl_handle); - -int dyad_mod_flux_dtl_rpc_unpack(dyad_mod_flux_dtl_t *dtl_handle, - const flux_msg_t *packed_obj, char **upath); - -int dyad_mod_flux_dtl_rpc_respond (dyad_mod_flux_dtl_t *dtl_handle, - const flux_msg_t *orig_msg); - -int dyad_mod_flux_dtl_establish_connection(dyad_mod_flux_dtl_t *dtl_handle); - -int dyad_mod_flux_dtl_send(dyad_mod_flux_dtl_t *dtl_handle, void *buf, size_t buflen); - -int dyad_mod_flux_dtl_close_connection(dyad_mod_flux_dtl_t *dtl_handle); - -int dyad_mod_flux_dtl_finalize(dyad_mod_flux_dtl_t *dtl_handle); - -#endif /* __DYAD_MOD_FLUX_DTL_H__ */ diff --git a/src/modules/dtl/ucx_mod_dtl.c b/src/modules/dtl/ucx_mod_dtl.c deleted file mode 100644 index ce8f28fa..00000000 --- a/src/modules/dtl/ucx_mod_dtl.c +++ /dev/null @@ -1,348 +0,0 @@ -#include "ucx_mod_dtl.h" - -#include "base64.h" - -// Get base64_maps_rfc4648 from flux-core -extern const base64_maps_t base64_maps_rfc4648; - -#define UCX_CHECK(status_code) status_code != UCS_OK - -#if !defined(UCP_API_VERSION) -#error Due to UCP API changes, we must be able to determine the version of UCP! \ - Please use a version of UCX with the UCP_API_VERSION macro defined! -#endif - -struct mod_request { - int completed; -}; -typedef struct mod_request mod_request_t; - -static void dyad_mod_ucx_request_init(void *request) -{ - mod_request_t *real_request = (mod_request_t*)request; - real_request->completed = 0; -} - -#if UCP_API_VERSION >= UCP_VERSION(1, 10) -static void dyad_ucx_send_handler(void *req, ucs_status_t status, void *ctx) -#else -static void dyad_ucx_send_handler(void *req, ucs_status_t status) -#endif -{ - mod_request_t *real_req = (mod_request_t*)req; - real_req->completed = 1; -} - -void dyad_mod_ucx_ep_err_handler (void *arg, ucp_ep_h ep, ucs_status_t status) { - flux_t *h = (flux_t*)arg; - FLUX_LOG_ERR (h, "An error occured on the UCP endpoint (status = %d)\n", status); -} - -int dyad_mod_ucx_dtl_init(flux_t *h, bool debug, dyad_mod_ucx_dtl_t **dtl_handle) -{ - ucp_params_t ucp_params; - ucp_worker_params_t worker_params; - ucp_config_t *config = NULL; - ucs_status_t status = UCS_OK; - *dtl_handle = malloc(sizeof(struct dyad_mod_ucx_dtl)); - (*dtl_handle)->h = h; - (*dtl_handle)->debug = debug; - (*dtl_handle)->ucx_ctx = NULL; - (*dtl_handle)->ucx_worker = NULL; - (*dtl_handle)->curr_ep = NULL; - (*dtl_handle)->curr_cons_addr = NULL; - (*dtl_handle)->addr_len = 0; - (*dtl_handle)->curr_comm_tag = 0; - FLUX_LOG_INFO (h, "Reading UCP config for DTL\n"); - status = ucp_config_read(NULL, NULL, &config); - if (UCX_CHECK(status)) - { - FLUX_LOG_ERR(h, "Could not read UCP config for data transport!\n"); - goto ucx_init_error; - } - ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES | - UCP_PARAM_FIELD_REQUEST_SIZE | - UCP_PARAM_FIELD_REQUEST_INIT; - ucp_params.features = UCP_FEATURE_TAG | - // UCP_FEATURE_RMA | - UCP_FEATURE_WAKEUP; - ucp_params.request_size = sizeof(struct mod_request); - ucp_params.request_init = dyad_mod_ucx_request_init; - FLUX_LOG_INFO (h, "Initializing UCX\n"); - status = ucp_init(&ucp_params, config, &((*dtl_handle)->ucx_ctx)); - if (debug) - { - ucp_config_print( - config, - stderr, - "UCX Configuration", - UCS_CONFIG_PRINT_CONFIG - ); - } - ucp_config_release(config); - if (UCX_CHECK(status)) - { - FLUX_LOG_ERR(h, "Could not initialize UCX for data transport!\n"); - goto ucx_init_error; - } - // Flux modules are single-threaded, so enable single-thread mode in UCX - worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE | - UCP_WORKER_PARAM_FIELD_EVENTS; - worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; - worker_params.events = UCP_WAKEUP_TAG_SEND; - FLUX_LOG_INFO (h, "Creating UCP worker\n"); - status = ucp_worker_create( - (*dtl_handle)->ucx_ctx, - &worker_params, - &(*dtl_handle)->ucx_worker - ); - if (UCX_CHECK(status)) - { - FLUX_LOG_ERR(h, "Could not create UCP worker for data transport!\n"); - goto ucx_init_error; - } - FLUX_LOG_INFO (h, "UCX initialization successful\n"); - return 0; - -ucx_init_error:; - dyad_mod_ucx_dtl_finalize(*dtl_handle); - return -1; -} - -int dyad_mod_ucx_dtl_rpc_unpack(dyad_mod_ucx_dtl_t *dtl_handle, - const flux_msg_t *packed_obj, char **upath) -{ - char* enc_addr = NULL; - size_t enc_addr_len = 0; - int errcode = 0; - uint32_t tag_prod = 0; - uint32_t tag_cons = 0; - ssize_t decoded_len = 0; - FLUX_LOG_INFO (dtl_handle->h, "Unpacking RPC payload\n"); - errcode = flux_request_unpack(packed_obj, - NULL, - "{s:s, s:i, s:i, s:s%}", - "upath", - upath, - "tag_prod", - &tag_prod, - "tag_cons", - &tag_cons, - "ucx_addr", - &enc_addr, - &enc_addr_len - ); - if (errcode < 0) - { - FLUX_LOG_ERR(dtl_handle->h, "Could not unpack Flux message from consumer!\n"); - return -1; - } - dtl_handle->curr_comm_tag = ((uint64_t)tag_prod << 32) | (uint64_t)tag_cons; - FLUX_LOG_INFO (dtl_handle->h, "Obtained upath from RPC payload: %s\n", upath); - FLUX_LOG_INFO (dtl_handle->h, "Obtained UCP tag from RPC payload: %lu\n", dtl_handle->curr_comm_tag); - FLUX_LOG_INFO (dtl_handle->h, "Decoding consumer UCP address using base64\n"); - dtl_handle->addr_len = base64_decoded_length(enc_addr_len); - dtl_handle->curr_cons_addr = (ucp_address_t*) malloc(dtl_handle->addr_len); - decoded_len = base64_decode_using_maps (&base64_maps_rfc4648, - (char*)dtl_handle->curr_cons_addr, dtl_handle->addr_len, - enc_addr, enc_addr_len); - if (decoded_len < 0) - { - // TODO log error - free(dtl_handle->curr_cons_addr); - dtl_handle->curr_cons_addr = NULL; - dtl_handle->addr_len = 0; - return -1; - } - return 0; -} - -int dyad_mod_ucx_dtl_rpc_respond (dyad_mod_ucx_dtl_t *dtl_handle, - const flux_msg_t *orig_msg) -{ - return 0; -} - -int dyad_mod_ucx_dtl_establish_connection(dyad_mod_ucx_dtl_t *dtl_handle) -{ - ucp_ep_params_t params; - ucs_status_t status = UCS_OK; - params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | - UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | - UCP_EP_PARAM_FIELD_ERR_HANDLER; - params.address = dtl_handle->curr_cons_addr; - params.err_mode = UCP_ERR_HANDLING_MODE_PEER; - params.err_handler.cb = dyad_mod_ucx_ep_err_handler; - params.err_handler.arg = (void*) dtl_handle->h; - FLUX_LOG_INFO (dtl_handle->h, "Create UCP endpoint for communication with consumer\n"); - status = ucp_ep_create( - dtl_handle->ucx_worker, - ¶ms, - &dtl_handle->curr_ep - ); - if (UCX_CHECK(status)) - { - FLUX_LOG_ERR (dtl_handle->h, "ucp_ep_create failed with status %d\n", (int) status); - return -1; - } - if (dtl_handle->debug) - { - ucp_ep_print_info(dtl_handle->curr_ep, stderr); - } - return 0; -} - -int dyad_mod_ucx_dtl_send(dyad_mod_ucx_dtl_t *dtl_handle, void *buf, size_t buflen) -{ - ucs_status_ptr_t stat_ptr; - ucs_status_t status = UCS_OK; - mod_request_t *req = NULL; - if (dtl_handle->curr_ep == NULL) - { - FLUX_LOG_INFO(dtl_handle->h, "UCP endpoint was not created prior to invoking send!\n"); - return 1; - } - // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 - // However, some systems (e.g., Lassen) may have an older verison - // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, - // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. -#if UCP_API_VERSION >= UCP_VERSION(1, 10) - ucp_request_param_t params; - params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; - params.cb.send = dyad_ucx_send_handler; - FLUX_LOG_INFO (dtl_handle->h, "Sending data to consumer with ucp_tag_send_nbx\n"); - stat_ptr = ucp_tag_send_nbx( - dtl_handle->curr_ep, - buf, - buflen, - dtl_handle->curr_comm_tag, - ¶ms - ); -#else - FLUX_LOG_INFO (dtl_handle->h, "Sending %lu bytes of data to consumer with ucp_tag_send_nb\n", buflen); - stat_ptr = ucp_tag_send_nb( - dtl_handle->curr_ep, - buf, - buflen, - UCP_DATATYPE_CONTIG, - dtl_handle->curr_comm_tag, - dyad_ucx_send_handler - ); -#endif - FLUX_LOG_INFO (dtl_handle->h, "Processing UCP send request\n"); - if (UCS_PTR_IS_ERR(stat_ptr)) - { - FLUX_LOG_ERR (dtl_handle->h, "Error occured in UCP send\n"); - status = UCS_PTR_STATUS(stat_ptr); - } - else if (UCS_PTR_IS_PTR(stat_ptr)) - { - FLUX_LOG_INFO (dtl_handle->h, "Waiting for send to complete\n"); - req = (mod_request_t*)stat_ptr; - while (!req->completed) - { - ucp_worker_progress(dtl_handle->ucx_worker); - } - req->completed = 0; - status = ucp_request_check_status(req); - ucp_request_free(req); - } - else - { - FLUX_LOG_INFO (dtl_handle->h, "UCP send completed immediately\n"); - status = UCS_OK; - } - if (UCX_CHECK(status)) - { - FLUX_LOG_ERR (dtl_handle->h, "UCP Tag Send failed (status = %d)!\n", (int)status); - return -1; - } - FLUX_LOG_INFO (dtl_handle->h, "Data send with UCP succeeded\n"); - return 0; -} - -int dyad_mod_ucx_dtl_close_connection(dyad_mod_ucx_dtl_t *dtl_handle) -{ - ucs_status_t status = UCS_OK; - ucs_status_ptr_t stat_ptr; - if (dtl_handle != NULL) - { - if (dtl_handle->curr_ep != NULL) - { - // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 - // However, some systems (e.g., Lassen) may have an older verison - // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, - // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. - FLUX_LOG_INFO (dtl_handle->h, "Start async closing of UCP endpoint\n"); -#if UCP_API_VERSION >= UCP_VERSION(1, 10) - ucp_request_param_t close_params; - close_params.op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS; - close_params.flags = UCP_EP_CLOSE_FLAG_FORCE; - stat_ptr = ucp_ep_close_nbx(dtl_handle->curr_ep, &close_params); -#else - // TODO change to FORCE if we decide to enable err handleing mode - stat_ptr = ucp_ep_close_nb(dtl_handle->curr_ep, UCP_EP_CLOSE_MODE_FORCE); -#endif - FLUX_LOG_INFO (dtl_handle->h, "Wait for endpoint closing to finish\n"); - if (stat_ptr != NULL) - { - // Endpoint close is in-progress. - // Wait until finished - if (UCS_PTR_IS_PTR(stat_ptr)) - { - do { - ucp_worker_progress(dtl_handle->ucx_worker); - status = ucp_request_check_status(stat_ptr); - } while (status == UCS_INPROGRESS); - ucp_request_free(stat_ptr); - } - // An error occurred during endpoint closure - // However, the endpoint can no longer be used - // Get the status code for reporting - else - { - status = UCS_PTR_STATUS(stat_ptr); - } - if (UCX_CHECK(status)) - { - FLUX_LOG_ERR(dtl_handle->h, "Could not successfully close Endpoint (status = %d)! However, endpoint was released.\n", status); - } - } - dtl_handle->curr_ep = NULL; - } - if (dtl_handle->curr_cons_addr != NULL) - { - free(dtl_handle->curr_cons_addr); - dtl_handle->curr_cons_addr = NULL; - dtl_handle->addr_len = 0; - } - dtl_handle->curr_comm_tag = 0; - } - FLUX_LOG_INFO (dtl_handle->h, "UCP endpoint close successful\n"); - return 0; -} - -int dyad_mod_ucx_dtl_finalize(dyad_mod_ucx_dtl_t *dtl_handle) -{ - if (dtl_handle != NULL) - { - if (dtl_handle->curr_cons_addr != NULL || dtl_handle->curr_ep != NULL) - { - dyad_mod_ucx_dtl_close_connection(dtl_handle); - } - if (dtl_handle->ucx_worker != NULL) - { - ucp_worker_destroy(dtl_handle->ucx_worker); - dtl_handle->ucx_worker = NULL; - } - if (dtl_handle->ucx_ctx != NULL) - { - ucp_cleanup(dtl_handle->ucx_ctx); - dtl_handle->ucx_ctx = NULL; - } - dtl_handle->h = NULL; - free(dtl_handle); - dtl_handle = NULL; - } - return 0; -} diff --git a/src/modules/dtl/ucx_mod_dtl.h b/src/modules/dtl/ucx_mod_dtl.h deleted file mode 100644 index f495ba04..00000000 --- a/src/modules/dtl/ucx_mod_dtl.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __DYAD_MOD_UCX_DTL_H__ -#define __DYAD_MOD_UCX_DTL_H__ - -#include "dyad_flux_log.h" -#include - -#if defined(__cplusplus) -#include -#else -#include -#include -#endif - -struct dyad_mod_ucx_dtl { - flux_t *h; - bool debug; - ucp_context_h ucx_ctx; - ucp_worker_h ucx_worker; - ucp_ep_h curr_ep; - ucp_address_t *curr_cons_addr; - size_t addr_len; - ucp_tag_t curr_comm_tag; -}; - -typedef struct dyad_mod_ucx_dtl dyad_mod_ucx_dtl_t; - -int dyad_mod_ucx_dtl_init(flux_t *h, bool debug, dyad_mod_ucx_dtl_t **dtl_handle); - -int dyad_mod_ucx_dtl_rpc_unpack(dyad_mod_ucx_dtl_t *dtl_handle, - const flux_msg_t *packed_obj, char **upath); - -int dyad_mod_ucx_dtl_rpc_respond (dyad_mod_ucx_dtl_t *dtl_handle, - const flux_msg_t *orig_msg); - -int dyad_mod_ucx_dtl_establish_connection(dyad_mod_ucx_dtl_t *dtl_handle); - -int dyad_mod_ucx_dtl_send(dyad_mod_ucx_dtl_t *dtl_handle, void *buf, size_t buflen); - -int dyad_mod_ucx_dtl_close_connection(dyad_mod_ucx_dtl_t *dtl_handle); - -int dyad_mod_ucx_dtl_finalize(dyad_mod_ucx_dtl_t *dtl_handle); - -#endif /* __DYAD_MOD_UCX_DTL_H__ */ diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 2c470190..628401da 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -30,9 +30,10 @@ #include #include -#include "dtl/dyad_mod_dtl.h" +#include "dyad_rc.h" #include "read_all.h" #include "utils.h" +#include "dyad_dtl_impl.h" #define TIME_DIFF(Tstart, Tend) \ ((double)(1000000000L * ((Tend).tv_sec - (Tstart).tv_sec) + (Tend).tv_nsec \ @@ -44,7 +45,7 @@ struct dyad_mod_ctx { bool debug; flux_msg_handler_t **handlers; const char *dyad_path; - dyad_mod_dtl_t *dtl_handle; + dyad_dtl_t *dtl_handle; }; const struct dyad_mod_ctx dyad_mod_ctx_default = { @@ -72,7 +73,7 @@ static void freectx (void *arg) dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)arg; flux_msg_handler_delvec (ctx->handlers); if (ctx->dtl_handle != NULL) { - dyad_mod_dtl_finalize (&(ctx->dtl_handle)); + dyad_dtl_finalize (&(ctx->dtl_handle)); ctx->dtl_handle = NULL; } free (ctx); @@ -122,7 +123,7 @@ static void dyad_fetch_request_cb (flux_t *h, char *upath = NULL; char fullpath[PATH_MAX + 1] = {'\0'}; int saved_errno = errno; - int rc = 0; + dyad_rc_t rc = 0; if (!flux_msg_is_streaming (msg)) { errno = EPROTO; @@ -133,8 +134,14 @@ static void dyad_fetch_request_cb (flux_t *h, goto fetch_error; FLUX_LOG_INFO (h, "DYAD_MOD: unpacking RPC message"); + + rc = ctx->dtl_handle->rpc_unpack ( + ctx->dtl_handle, + msg, + &upath + ); - if (dyad_mod_dtl_rpc_unpack (ctx->dtl_handle, msg, &upath) < 0) { + if (DYAD_IS_ERROR(rc)) { FLUX_LOG_ERR (ctx->h, "Could not unpack message from client\n"); errno = EPROTO; goto fetch_error; @@ -143,7 +150,11 @@ static void dyad_fetch_request_cb (flux_t *h, FLUX_LOG_INFO (h, "DYAD_MOD: requested user_path: %s", upath); FLUX_LOG_INFO (h, "DYAD_MOD: sending initial response to consumer"); - if (dyad_mod_dtl_rpc_respond (ctx->dtl_handle, msg) < 0) { + rc = ctx->dtl_handle->rpc_respond ( + ctx->dtl_handle, + msg + ); + if (DYAD_IS_ERROR(rc)) { FLUX_LOG_ERR (ctx->h, "Could not send primary RPC response to client\n"); goto fetch_error; } @@ -173,17 +184,25 @@ static void dyad_fetch_request_cb (flux_t *h, FLUX_LOG_INFO (h, "Is inbuf NULL? -> %i\n", (int) (inbuf == NULL)); FLUX_LOG_INFO (h, "Establish DTL connection with consumer"); - if (dyad_mod_dtl_establish_connection (ctx->dtl_handle) < 0) { + rc = ctx->dtl_handle->establish_connection ( + ctx->dtl_handle, + DYAD_COMM_SEND + ); + if (DYAD_IS_ERROR(rc)) { FLUX_LOG_ERR (ctx->h, "Could not establish DTL connection with client\n"); errno = ECONNREFUSED; goto fetch_error; } FLUX_LOG_INFO (h, "Send file to consumer with DTL"); - rc = dyad_mod_dtl_send (ctx->dtl_handle, inbuf, inlen); + rc = ctx->dtl_handle->send ( + ctx->dtl_handle, + inbuf, + inlen + ); FLUX_LOG_INFO (h, "Close DTL connection with consumer"); - dyad_mod_dtl_close_connection (ctx->dtl_handle); + ctx->dtl_handle->close_connection (ctx->dtl_handle); free(inbuf); - if (rc < 0) { + if (DYAD_IS_ERROR(rc)) { FLUX_LOG_ERR (ctx->h, "Could not send data to client via DTL\n"); errno = ECOMM; goto fetch_error; @@ -206,18 +225,18 @@ static void dyad_fetch_request_cb (flux_t *h, return; } -static int dyad_open (flux_t *h, dyad_mod_dtl_mode_t dtl_mode, bool debug) +static dyad_rc_t dyad_open (flux_t *h, dyad_dtl_mode_t dtl_mode, bool debug) { dyad_mod_ctx_t *ctx = getctx (h); - int rc = 0; + dyad_rc_t rc = 0; char *e = NULL; ctx->debug = debug; - rc = dyad_mod_dtl_init ( + rc = dyad_dtl_init ( + &(ctx->dtl_handle), dtl_mode, h, - ctx->debug, - &(ctx->dtl_handle) + ctx->debug ); return rc; @@ -242,12 +261,12 @@ void usage() fprintf(stderr, " * --debug | -d: if provided, add debugging log messages\n"); } -int mod_main (flux_t *h, int argc, char **argv) +DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) { const mode_t m = (S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH | S_ISGID); dyad_mod_ctx_t *ctx = NULL; size_t flag_len = 0; - dyad_mod_dtl_mode_t dtl_mode = DYAD_DTL_UCX; + dyad_dtl_mode_t dtl_mode = DYAD_DTL_UCX; bool debug = false; if (!h) { @@ -290,7 +309,7 @@ int mod_main (flux_t *h, int argc, char **argv) } } - if (dyad_open (h, dtl_mode, debug) < 0) { + if (DYAD_IS_ERROR (dyad_open (h, dtl_mode, debug))) { FLUX_LOG_ERR (ctx->h, "dyad_open failed"); goto mod_error; } @@ -317,7 +336,7 @@ mod_done:; return EXIT_SUCCESS; } -MOD_NAME ("dyad"); +DYAD_DLL_EXPORTED MOD_NAME ("dyad"); /* * vi:tabstop=4 shiftwidth=4 expandtab diff --git a/src/stream/Makefile.am b/src/stream/Makefile.am index 22ac9ffe..a34ffab0 100644 --- a/src/stream/Makefile.am +++ b/src/stream/Makefile.am @@ -1,12 +1,23 @@ lib_LTLIBRARIES = libdyad_fstream.la libdyad_fstream_la_SOURCES = dyad_stream_core.cpp -libdyad_fstream_la_LDFLAGS = -Wl,-rpath,'$(UCX_LIBDIR)' $(AM_LDFLAGS) -avoid-version -no-undefined +libdyad_fstream_la_LDFLAGS = \ + -Wl,-rpath,'$(UCX_LIBDIR)' \ + $(AM_LDFLAGS) \ + -avoid-version \ + -no-undefined libdyad_fstream_la_LIBADD = $(top_builddir)/src/core/libdyad_core.la -libdyad_fstream_la_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/src/utils -I$(top_srcdir)/src/core $(FLUX_CORE_CFLAGS) +libdyad_fstream_la_CXXFLAGS = \ + $(AM_CFLAGS) \ + -I$(top_srcdir)/src/utils \ + -I$(top_srcdir)/src/core \ + -I$(top_srcdir)/src/dtl \ + $(FLUX_CORE_CFLAGS) +libdyad_fstream_la_CPPFLAGS = if PERFFLOW libdyad_fstream_la_LIBADD += $(PERFFLOW_LIBS) -libdyad_fstream_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +libdyad_fstream_la_CXXFLAGS += $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +libdyad_fstream_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) endif include_HEADERS = dyad_params.hpp dyad_stream_core.hpp dyad_stream_api.hpp diff --git a/src/urpc/Makefile.am b/src/urpc/Makefile.am index 10fa200d..46e1958c 100644 --- a/src/urpc/Makefile.am +++ b/src/urpc/Makefile.am @@ -1,8 +1,10 @@ lib_LTLIBRARIES = liburpc_client.la liburpc_client_la_SOURCES = urpc_client.c urpc_client.hpp liburpc_client_la_LIBADD = $(FLUX_CORE_LIBS) $(top_builddir)/src/utils/libutils.la -liburpc_client_la_CPPFLAGS = $(FLUX_CORE_CFLAGS) -DURPC_CHECK=1 -I$(top_builddir)/src/utils +liburpc_client_la_CFLAGS = $(FLUX_CORE_CFLAGS) -DURPC_CHECK=1 -I$(top_builddir)/src/utils +liburpc_client_la_CPPFLAGS = if PERFFLOW liburpc_client_la_LIBADD += $(PERFFLOW_LIBS) -liburpc_client_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +liburpc_client_la_CFLAGS += $(PERFFLOW_CFLAGS) -DDYAD_PERFFLOW=1 +liburpc_client_la_CPPFLAGS += $(PERFFLOW_PLUGIN_CPPFLAGS) endif diff --git a/src/utils/Makefile.am b/src/utils/Makefile.am index 2ea4011c..ed069e66 100644 --- a/src/utils/Makefile.am +++ b/src/utils/Makefile.am @@ -2,10 +2,16 @@ SUBDIRS = libtap base64 noinst_LTLIBRARIES = libutils.la libmurmur3.la libutils_la_SOURCES = utils.c utils.h read_all.c read_all.h -libutils_la_CPPFLAGS = \ - $(AM_CPPFLAGS) \ - $(FLUX_CORE_CFLAGS) -libutils_la_LIBADD = $(top_builddir)/src/utils/base64/libbase64.la $(FLUX_CORE_LIBS) +libutils_la_CFLAGS = \ + $(AM_CFLAGS) \ + $(FLUX_CORE_CFLAGS) \ + -fvisibility=hidden +libutils_la_LIBADD = \ + $(top_builddir)/src/utils/base64/libbase64.la \ + $(FLUX_CORE_LIBS) libmurmur3_la_SOURCES = murmur3.c murmur3.h +libmurmur3_la_CFLAGS = \ + $(AM_CFLAGS) \ + -fvisibility=hidden #libmurmur3_la_CPPFLAGS = $(AM_CPPFLAGS) #libmurmur3_la_LIBADD = diff --git a/src/utils/base64/Makefile.am b/src/utils/base64/Makefile.am index 4b11dfd8..fde9ddaf 100644 --- a/src/utils/base64/Makefile.am +++ b/src/utils/base64/Makefile.am @@ -1,3 +1,6 @@ noinst_LTLIBRARIES = libbase64.la libbase64_la_SOURCES = base64.c base64.h +libbase64_la_CFLAGS = \ + $(AM_CFLAGS) \ + -fvisibility=hidden diff --git a/src/wrapper/Makefile.am b/src/wrapper/Makefile.am index ee02ecad..694ac490 100644 --- a/src/wrapper/Makefile.am +++ b/src/wrapper/Makefile.am @@ -7,9 +7,17 @@ dyad_wrapper_la_LDFLAGS = \ -avoid-version \ -no-undefined \ -shared \ - -export-symbols wrapper.sym -dyad_wrapper_la_LIBADD = $(top_builddir)/src/core/libdyad_core.la -dyad_wrapper_la_CPPFLAGS = -I$(top_srcdir)/src/utils -I$(top_srcdir)/src/core $(FLUX_CORE_CFLAGS) + # -export-symbols wrapper.sym +dyad_wrapper_la_LIBADD = \ + $(top_builddir)/src/utils/libutils.la \ + $(top_builddir)/src/core/libdyad_core.la +dyad_wrapper_la_CPPFLAGS = \ + -I$(top_srcdir)/src/utils \ + -I$(top_srcdir)/src/core \ + -I$(top_srcdir)/src/dtl \ + $(FLUX_CORE_CFLAGS) \ + -DBUILDING_DYAD \ + -fvisibility=hidden install-exec-hook: @(cd $(DESTDIR)$(libdir) && $(RM) dyad_wrapper.la) diff --git a/src/wrapper/wrapper.c b/src/wrapper/wrapper.c index 79c48e37..d146668c 100644 --- a/src/wrapper/wrapper.c +++ b/src/wrapper/wrapper.c @@ -47,7 +47,7 @@ extern "C" { #endif static __thread dyad_ctx_t *ctx = NULL; -static void dyad_wrapper_init (void) __attribute__((constructor)); +static void dyad_wrapper_init (void) __attribute__ ((constructor)); static void dyad_wrapper_fini (void) __attribute__ ((destructor)); #if DYAD_SYNC_DIR @@ -114,7 +114,7 @@ void dyad_wrapper_fini () dyad_finalize (&ctx); } -int open (const char *path, int oflag, ...) +DYAD_DLL_EXPORTED int open (const char *path, int oflag, ...) { char *error = NULL; typedef int (*open_ptr_t) (const char *, int, mode_t, ...); @@ -161,7 +161,7 @@ real_call:; return (func_ptr (path, oflag, mode)); } -FILE *fopen (const char *path, const char *mode) +DYAD_DLL_EXPORTED FILE *fopen (const char *path, const char *mode) { char *error = NULL; typedef FILE *(*fopen_ptr_t) (const char *, const char *); @@ -199,7 +199,7 @@ FILE *fopen (const char *path, const char *mode) return (func_ptr (path, mode)); } -int close (int fd) +DYAD_DLL_EXPORTED int close (int fd) { bool to_sync = false; char *error = NULL; @@ -288,7 +288,7 @@ real_call:; // semicolon here to avoid the error return rc; } -int fclose (FILE *fp) +DYAD_DLL_EXPORTED int fclose (FILE *fp) { bool to_sync = false; char *error = NULL; diff --git a/src/wrapper/wrapper.sym b/src/wrapper/wrapper.sym deleted file mode 100644 index 87d3bc26..00000000 --- a/src/wrapper/wrapper.sym +++ /dev/null @@ -1,4 +0,0 @@ -open -fopen -close -fclose From 5b54ceaa402858e1d43823864c59e57946c4ad56 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Thu, 27 Jul 2023 14:04:10 -0700 Subject: [PATCH 06/18] Fixes request waiting for UCX DTL --- src/dtl/ucx_dtl.c | 16 ++++++++++------ src/modules/dyad.c | 4 ++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c index 54bcb1ad..77bf25d1 100644 --- a/src/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -77,12 +77,12 @@ static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, // that minimize the size of the worker's event queue. // In other words, prior UCX calls should mean that this loop only runs // a couple of times at most. - while (request->completed != 1) - { + do { ucp_worker_progress(dtl_handle->ucx_worker); - } - // Get the final status of the communication operation - final_request_status = ucp_request_check_status(request); + // usleep(100); + // Get the final status of the communication operation + final_request_status = ucp_request_check_status(request); + } while (final_request_status == UCS_INPROGRESS); // Free and deallocate the request object ucp_request_free(request); return final_request_status; @@ -350,6 +350,10 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char FLUX_LOG_INFO (dtl_handle->h, "Decoding consumer UCP address using base64\n"); dtl_handle->addr_len = base64_decoded_length(enc_addr_len); dtl_handle->consumer_address = (ucp_address_t*) malloc(dtl_handle->addr_len); + if (dtl_handle->consumer_address == NULL) { + FLUX_LOG_ERR (dtl_handle->h, "Could not allocate memory for consumer address"); + return DYAD_RC_SYSFAIL; + } decoded_len = base64_decode_using_maps (&base64_maps_rfc4648, (char*)dtl_handle->consumer_address, dtl_handle->addr_len, enc_addr, enc_addr_len); @@ -692,4 +696,4 @@ dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_t **self) rc = dyad_dtl_ucx_finalize_impl (&dtl_handle); (*self)->private.ucx_dtl_handle = NULL; return rc; -} \ No newline at end of file +} diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 628401da..5a8db126 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -85,6 +85,10 @@ static dyad_mod_ctx_t *getctx (flux_t *h) if (!ctx) { ctx = (dyad_mod_ctx_t *) malloc (sizeof (*ctx)); + if (ctx == NULL) { + FLUX_LOG_ERR (h, "DYAD_MOD: could not allocate memory for context"); + goto getctx_error; + } ctx->h = h; ctx->debug = false; ctx->handlers = NULL; From 5eaa6ba17b5f169696be93a55b613ffe8742b417 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 17:57:29 -0700 Subject: [PATCH 07/18] Various changes from PR review The specific changes made in this commit are: 1. Ensures extern C is added everywhere it is needed 2. Adds update to CXXFLAGS to configure.ac 3. Removes sync_started 4. Moves dyad_kvs_response_t to dyad_core.c 5. Moves method assignment for dyad_dtl_t to underlying DTL implementations 6. Adds END variants to enums in DTL 7. Adds restrict everywhere I can --- configure.ac | 1 + src/core/dyad_core.c | 9 ++- src/core/dyad_core.h | 7 --- src/dtl/dyad_dtl.h | 1 + src/dtl/dyad_dtl_impl.c | 24 ++------ src/dtl/dyad_dtl_impl.h | 5 +- src/dtl/dyad_flux_log.h | 8 +++ src/dtl/dyad_rc.h | 8 +++ src/dtl/flux_dtl.c | 28 ++++++--- src/dtl/flux_dtl.h | 6 +- src/dtl/ucx_dtl.c | 115 +++++++++++++++++++----------------- src/dtl/ucx_dtl.h | 6 +- src/modules/Makefile.am | 3 +- src/modules/dyad_flux_log.h | 22 ------- src/utils/base64/base64.h | 13 +++- 15 files changed, 134 insertions(+), 122 deletions(-) delete mode 100644 src/modules/dyad_flux_log.h diff --git a/configure.ac b/configure.ac index 5e0e5bf7..371f3c95 100644 --- a/configure.ac +++ b/configure.ac @@ -186,6 +186,7 @@ AC_CHECK_FUNCS( \ ####################################### if test "x$enable_debug" = xyes; then CFLAGS="$CFLAGS -DDYAD_FULL_DEBUG=1 -DDYAD_LOGGING_ON=1" + CXXFLAGS="$CXXFLAGS -DDYAD_FULL_DEBUG=1 -DDYAD_LOGGING_ON=1" fi ######################## diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index f225a0c0..fa96f8b2 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -22,7 +22,6 @@ const struct dyad_ctx dyad_ctx_default = { false, // reenter true, // initialized false, // shared_storage - false, // sync_started 3u, // key_depth 1024u, // key_bins 0u, // rank @@ -31,6 +30,12 @@ const struct dyad_ctx dyad_ctx_default = { NULL // cons_managed_path }; +struct dyad_kvs_response { + char* fpath; + uint32_t owner_rank; +}; +typedef struct dyad_kvs_response dyad_kvs_response_t; + static int gen_path_key (const char* str, char* path_key, const size_t len, @@ -847,7 +852,7 @@ int dyad_finalize (dyad_ctx_t** ctx) #if DYAD_PERFFLOW __attribute__((annotate("@critical_path()"))) #endif -int dyad_sync_directory(dyad_ctx_t* ctx, const char* path) +int dyad_sync_directory(dyad_ctx_t* restrict ctx, const char* restrict path) { // Flush new directory entry https://lwn.net/Articles/457671/ char path_copy[PATH_MAX + 1]; int odir_fd = -1; diff --git a/src/core/dyad_core.h b/src/core/dyad_core.h index bc75ce08..2a43d18a 100644 --- a/src/core/dyad_core.h +++ b/src/core/dyad_core.h @@ -40,7 +40,6 @@ struct dyad_ctx { bool reenter; // if false, do not recursively enter DYAD bool initialized; // if true, DYAD is initialized bool shared_storage; // if true, the managed path is shared - bool sync_started; // TODO unsigned int key_depth; // Depth of bins for the Flux KVS unsigned int key_bins; // Number of bins for the Flux KVS uint32_t rank; // Flux rank for DYAD @@ -51,12 +50,6 @@ struct dyad_ctx { DYAD_DLL_EXPORTED extern const struct dyad_ctx dyad_ctx_default; typedef struct dyad_ctx dyad_ctx_t; -struct dyad_kvs_response { - char* fpath; - uint32_t owner_rank; -}; -typedef struct dyad_kvs_response dyad_kvs_response_t; - // Debug message #ifndef DPRINTF #define DPRINTF(curr_dyad_ctx, fmt, ...) \ diff --git a/src/dtl/dyad_dtl.h b/src/dtl/dyad_dtl.h index 0223536e..ceca3f9c 100644 --- a/src/dtl/dyad_dtl.h +++ b/src/dtl/dyad_dtl.h @@ -8,6 +8,7 @@ extern "C" { enum dyad_dtl_mode { DYAD_DTL_UCX = 0, DYAD_DTL_FLUX_RPC = 1, + DYAD_DTL_END = 2 }; typedef enum dyad_dtl_mode dyad_dtl_mode_t; diff --git a/src/dtl/dyad_dtl_impl.c b/src/dtl/dyad_dtl_impl.c index 6b394bfc..4f12b7b0 100644 --- a/src/dtl/dyad_dtl_impl.c +++ b/src/dtl/dyad_dtl_impl.c @@ -14,7 +14,7 @@ dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, (*dtl_handle)->mode = mode; if (mode == DYAD_DTL_UCX) { rc = dyad_dtl_ucx_init ( - &((*dtl_handle)->private.ucx_dtl_handle), + *dtl_handle, mode, h, debug @@ -22,18 +22,10 @@ dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, if (DYAD_IS_ERROR(rc)) { return rc; } - (*dtl_handle)->rpc_pack = dyad_dtl_ucx_rpc_pack; - (*dtl_handle)->rpc_unpack = dyad_dtl_ucx_rpc_unpack; - (*dtl_handle)->rpc_respond = dyad_dtl_ucx_rpc_respond; - (*dtl_handle)->rpc_recv_response = dyad_dtl_ucx_rpc_recv_response; - (*dtl_handle)->establish_connection = dyad_dtl_ucx_establish_connection; - (*dtl_handle)->send = dyad_dtl_ucx_send; - (*dtl_handle)->recv = dyad_dtl_ucx_recv; - (*dtl_handle)->close_connection = dyad_dtl_ucx_close_connection; return DYAD_RC_OK; } else if (mode == DYAD_DTL_FLUX_RPC) { rc = dyad_dtl_flux_init ( - &((*dtl_handle)->private.flux_dtl_handle), + *dtl_handle, mode, h, debug @@ -41,14 +33,6 @@ dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, if (DYAD_IS_ERROR(rc)) { return rc; } - (*dtl_handle)->rpc_pack = dyad_dtl_flux_rpc_pack; - (*dtl_handle)->rpc_unpack = dyad_dtl_flux_rpc_unpack; - (*dtl_handle)->rpc_respond = dyad_dtl_flux_rpc_respond; - (*dtl_handle)->rpc_recv_response = dyad_dtl_flux_rpc_recv_response; - (*dtl_handle)->establish_connection = dyad_dtl_flux_establish_connection; - (*dtl_handle)->send = dyad_dtl_flux_send; - (*dtl_handle)->recv = dyad_dtl_flux_recv; - (*dtl_handle)->close_connection = dyad_dtl_flux_close_connection; return DYAD_RC_OK; } return DYAD_RC_BADDTLMODE; @@ -58,6 +42,10 @@ dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle) { dyad_rc_t rc = DYAD_RC_OK; if (dtl_handle == NULL || *dtl_handle == NULL) + // We should only reach this line if the user has passed + // in an already-finalized DTL handle. In that case, + // this function should be treated as a no-op, and we + // should return DYAD_RC_OK to indicate no error has occured return DYAD_RC_OK; if ((*dtl_handle)->mode == DYAD_DTL_UCX) { if ((*dtl_handle)->private.ucx_dtl_handle != NULL) { diff --git a/src/dtl/dyad_dtl_impl.h b/src/dtl/dyad_dtl_impl.h index 513a20e2..18bfcc9f 100644 --- a/src/dtl/dyad_dtl_impl.h +++ b/src/dtl/dyad_dtl_impl.h @@ -32,14 +32,15 @@ enum dyad_dtl_comm_mode { DYAD_COMM_NONE = 0, // Sanity check value for when connection isn't established DYAD_COMM_RECV = 1, // DTL connection will only receive data DYAD_COMM_SEND = 2, // DTL connection will only send data + DYAD_COMM_END = 3 }; typedef enum dyad_dtl_comm_mode dyad_dtl_comm_mode_t; struct dyad_dtl { dyad_dtl_private_t private; dyad_dtl_mode_t mode; - dyad_rc_t (*rpc_pack)(struct dyad_dtl* self, const char* upath, - uint32_t producer_rank, json_t** packed_obj); + dyad_rc_t (*rpc_pack)(struct dyad_dtl* restrict self, const char* restrict upath, + uint32_t producer_rank, json_t** restrict packed_obj); dyad_rc_t (*rpc_unpack)(struct dyad_dtl* self, const flux_msg_t* packed_obj, char** upath); dyad_rc_t (*rpc_respond)(struct dyad_dtl* self, const flux_msg_t* orig_msg); diff --git a/src/dtl/dyad_flux_log.h b/src/dtl/dyad_flux_log.h index 45dd9c26..1e77bf1b 100644 --- a/src/dtl/dyad_flux_log.h +++ b/src/dtl/dyad_flux_log.h @@ -3,6 +3,10 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + #if !defined(DYAD_LOGGING_ON) || (DYAD_LOGGING_ON == 0) #define DYAD_LOG_INFO(dyad_ctx, ...) \ do { \ @@ -23,5 +27,9 @@ #define FLUX_LOG_INFO(flux_ctx, ...) flux_log (flux_ctx, LOG_INFO, __VA_ARGS__) #define FLUX_LOG_ERR(flux_ctx, ...) flux_log_error (flux_ctx, __VA_ARGS__) #endif + +#ifdef __cplusplus +} +#endif #endif /* DYAD_CORE_DYAD_FLUX_LOG_H */ diff --git a/src/dtl/dyad_rc.h b/src/dtl/dyad_rc.h index 0a028b2b..1ecf3166 100644 --- a/src/dtl/dyad_rc.h +++ b/src/dtl/dyad_rc.h @@ -7,6 +7,10 @@ #define DYAD_DLL_EXPORTED #endif +#ifdef __cplusplus +extern "C" { +#endif + enum dyad_core_return_codes { DYAD_RC_OK = 0, // Operation worked correctly DYAD_RC_SYSFAIL = -1, // Some sys call or C standard @@ -34,4 +38,8 @@ typedef enum dyad_core_return_codes dyad_rc_t; #define DYAD_IS_ERROR(code) ((code) < 0) +#ifdef __cplusplus +} +#endif + #endif // DYAD_CORE_DYAD_RC_H diff --git a/src/dtl/flux_dtl.c b/src/dtl/flux_dtl.c index 4e5064ab..ac945241 100644 --- a/src/dtl/flux_dtl.c +++ b/src/dtl/flux_dtl.c @@ -1,22 +1,32 @@ #include "flux_dtl.h" -dyad_rc_t dyad_dtl_flux_init (dyad_dtl_flux_t** dtl_handle, +dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* dtl_handle, dyad_dtl_mode_t mode, flux_t* h, bool debug) { - *dtl_handle = malloc(sizeof(struct dyad_dtl_flux)); - if (*dtl_handle == NULL) { + self->private.flux_dtl_handle = malloc(sizeof(struct dyad_dtl_flux)); + if (self->private.flux_dtl_handle == NULL) { FLUX_LOG_ERR (h, "Cannot allocate the Flux DTL handle\n"); return DYAD_RC_SYSFAIL; } - (*dtl_handle)->h = h; - (*dtl_handle)->debug = debug; - (*dtl_handle)->f = NULL; - (*dtl_handle)->msg = NULL; + self->private.flux_dtl_handle->h = h; + self->private.flux_dtl_handle->debug = debug; + self->private.flux_dtl_handle->f = NULL; + self->private.flux_dtl_handle->msg = NULL; + + self->rpc_pack = dyad_dtl_flux_rpc_pack; + self->rpc_unpack = dyad_dtl_flux_rpc_unpack; + self->rpc_respond = dyad_dtl_flux_rpc_respond; + self->rpc_recv_response = dyad_dtl_flux_rpc_recv_response; + self->establish_connection = dyad_dtl_flux_establish_connection; + self->send = dyad_dtl_flux_send; + self->recv = dyad_dtl_flux_recv; + self->close_connection = dyad_dtl_flux_close_connection; + return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* self, const char* upath, - uint32_t producer_rank, json_t** packed_obj) +dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, const char* restrict upath, + uint32_t producer_rank, json_t** restrict packed_obj) { dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; *packed_obj = json_pack( diff --git a/src/dtl/flux_dtl.h b/src/dtl/flux_dtl.h index 6af5665c..34dd14a6 100644 --- a/src/dtl/flux_dtl.h +++ b/src/dtl/flux_dtl.h @@ -14,11 +14,11 @@ struct dyad_dtl_flux { typedef struct dyad_dtl_flux dyad_dtl_flux_t; -dyad_rc_t dyad_dtl_flux_init (dyad_dtl_flux_t** dtl_handle, +dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* self, dyad_dtl_mode_t mode, flux_t* h, bool debug); -dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* self, const char* upath, - uint32_t producer_rank, json_t** packed_obj); +dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, const char* restrict upath, + uint32_t producer_rank, json_t** restrict packed_obj); dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char** upath); diff --git a/src/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c index 77bf25d1..e92a014a 100644 --- a/src/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -102,33 +102,9 @@ static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, static inline dyad_rc_t dyad_dtl_ucx_finalize_impl (dyad_dtl_ucx_t **dtl_handle) { - // Release consumer address if not already released - if ((*dtl_handle)->consumer_address != NULL) { - ucp_worker_release_address( - (*dtl_handle)->ucx_worker, - (*dtl_handle)->consumer_address - ); - (*dtl_handle)->consumer_address = NULL; - } - // Release worker if not already released - if ((*dtl_handle)->ucx_worker != NULL) { - ucp_worker_destroy((*dtl_handle)->ucx_worker); - (*dtl_handle)->ucx_worker = NULL; - } - // Release context if not already released - if ((*dtl_handle)->ucx_ctx != NULL) { - ucp_cleanup((*dtl_handle)->ucx_ctx); - (*dtl_handle)->ucx_ctx = NULL; - } - // Flux handle should be released by the - // DYAD context, so it is not released here - (*dtl_handle)->h = NULL; - // Free the handle and set to NULL to prevent double free - free(*dtl_handle); - return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_ucx_t** dtl_handle, dyad_dtl_mode_t mode, +dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_t* self, dyad_dtl_mode_t mode, flux_t *h, bool debug) { ucp_params_t ucx_params; @@ -136,29 +112,31 @@ dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_ucx_t** dtl_handle, dyad_dtl_mode_t mode, ucp_config_t *config; ucs_status_t status; ucp_worker_attr_t worker_attrs; + dyad_dtl_ucx_t* dtl_handle = NULL; - *dtl_handle = malloc(sizeof(struct dyad_dtl_ucx)); - if (*dtl_handle == NULL) { + self->private.ucx_dtl_handle = malloc(sizeof(struct dyad_dtl_ucx)); + if (self->private.ucx_dtl_handle == NULL) { FLUX_LOG_ERR (h, "Could not allocate UCX DTL context\n"); return DYAD_RC_SYSFAIL; } + dtl_handle = self->private.ucx_dtl_handle; // Allocation/Freeing of the Flux handle should be // handled by the DYAD context - (*dtl_handle)->h = h; - (*dtl_handle)->debug = debug; - (*dtl_handle)->ucx_ctx = NULL; - (*dtl_handle)->ucx_worker = NULL; - (*dtl_handle)->ep = NULL; - (*dtl_handle)->curr_comm_mode = DYAD_COMM_NONE; - (*dtl_handle)->consumer_address = NULL; - (*dtl_handle)->addr_len = 0; - (*dtl_handle)->comm_tag = 0; + dtl_handle->h = h; + dtl_handle->debug = debug; + dtl_handle->ucx_ctx = NULL; + dtl_handle->ucx_worker = NULL; + dtl_handle->ep = NULL; + dtl_handle->curr_comm_mode = DYAD_COMM_NONE; + dtl_handle->consumer_address = NULL; + dtl_handle->addr_len = 0; + dtl_handle->comm_tag = 0; // Read the UCX configuration - FLUX_LOG_INFO ((*dtl_handle)->h, "Reading UCP config\n"); + FLUX_LOG_INFO (dtl_handle->h, "Reading UCP config\n"); status = ucp_config_read (NULL, NULL, &config); if (UCX_STATUS_FAIL(status)) { - FLUX_LOG_ERR ((*dtl_handle)->h, "Could not read the UCX config\n"); + FLUX_LOG_ERR (dtl_handle->h, "Could not read the UCX config\n"); goto error; } @@ -181,8 +159,8 @@ dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_ucx_t** dtl_handle, dyad_dtl_mode_t mode, ucx_params.request_init = dyad_ucx_request_init; // Initialize UCX - FLUX_LOG_INFO ((*dtl_handle)->h, "Initializing UCP\n"); - status = ucp_init(&ucx_params, config, &(*dtl_handle)->ucx_ctx); + FLUX_LOG_INFO (dtl_handle->h, "Initializing UCP\n"); + status = ucp_init(&ucx_params, config, &dtl_handle->ucx_ctx); // If in debug mode, print the configuration of UCX to stderr if (debug) { @@ -213,42 +191,51 @@ dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_ucx_t** dtl_handle, dyad_dtl_mode_t mode, worker_params.events = UCP_WAKEUP_TAG_RECV; // Create the worker and log an error if that fails - FLUX_LOG_INFO ((*dtl_handle)->h, "Creating UCP worker\n"); + FLUX_LOG_INFO (dtl_handle->h, "Creating UCP worker\n"); status = ucp_worker_create( - (*dtl_handle)->ucx_ctx, + dtl_handle->ucx_ctx, &worker_params, - &(*dtl_handle)->ucx_worker + &(dtl_handle->ucx_worker) ); if (UCX_STATUS_FAIL(status)) { - FLUX_LOG_ERR (h, "ucp_worker_create failed (status = %d)!\n", status); + FLUX_LOG_ERR (dtl_handle->h, "ucp_worker_create failed (status = %d)!\n", status); goto error; } // Query the worker for its address worker_attrs.field_mask = UCP_WORKER_ATTR_FIELD_ADDRESS; - FLUX_LOG_INFO ((*dtl_handle)->h, "Get address of UCP worker\n"); + FLUX_LOG_INFO (dtl_handle->h, "Get address of UCP worker\n"); status = ucp_worker_query( - (*dtl_handle)->ucx_worker, + dtl_handle->ucx_worker, &worker_attrs ); if (UCX_STATUS_FAIL(status)) { FLUX_LOG_ERR (h, "Cannot get UCX worker address (status = %d)!\n", status); goto error; } - (*dtl_handle)->consumer_address = worker_attrs.address; - (*dtl_handle)->addr_len = worker_attrs.address_length; + dtl_handle->consumer_address = worker_attrs.address; + dtl_handle->addr_len = worker_attrs.address_length; + + self->rpc_pack = dyad_dtl_ucx_rpc_pack; + self->rpc_unpack = dyad_dtl_ucx_rpc_unpack; + self->rpc_respond = dyad_dtl_ucx_rpc_respond; + self->rpc_recv_response = dyad_dtl_ucx_rpc_recv_response; + self->establish_connection = dyad_dtl_ucx_establish_connection; + self->send = dyad_dtl_ucx_send; + self->recv = dyad_dtl_ucx_recv; + self->close_connection = dyad_dtl_ucx_close_connection; return DYAD_RC_OK; error:; // If an error occured, finalize the DTL handle and // return a failing error code - dyad_dtl_ucx_finalize_impl(dtl_handle); + dyad_dtl_ucx_finalize (&self); return DYAD_RC_UCXINIT_FAIL; } -dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t *self, const char *upath, - uint32_t producer_rank, json_t **packed_obj) +dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t* restrict self, const char* restrict upath, + uint32_t producer_rank, json_t** restrict packed_obj) { size_t enc_len = 0; char* enc_buf = NULL; @@ -693,7 +680,29 @@ dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_t **self) dyad_dtl_ucx_close_connection (*self); dtl_handle->ep = NULL; } - rc = dyad_dtl_ucx_finalize_impl (&dtl_handle); + // Release consumer address if not already released + if (dtl_handle->consumer_address != NULL) { + ucp_worker_release_address( + dtl_handle->ucx_worker, + dtl_handle->consumer_address + ); + dtl_handle->consumer_address = NULL; + } + // Release worker if not already released + if (dtl_handle->ucx_worker != NULL) { + ucp_worker_destroy(dtl_handle->ucx_worker); + dtl_handle->ucx_worker = NULL; + } + // Release context if not already released + if (dtl_handle->ucx_ctx != NULL) { + ucp_cleanup(dtl_handle->ucx_ctx); + dtl_handle->ucx_ctx = NULL; + } + // Flux handle should be released by the + // DYAD context, so it is not released here + dtl_handle->h = NULL; + // Free the handle and set to NULL to prevent double free + free(dtl_handle); (*self)->private.ucx_dtl_handle = NULL; - return rc; + return DYAD_RC_OK; } diff --git a/src/dtl/ucx_dtl.h b/src/dtl/ucx_dtl.h index 9b92e5b6..658db384 100644 --- a/src/dtl/ucx_dtl.h +++ b/src/dtl/ucx_dtl.h @@ -21,11 +21,11 @@ struct dyad_dtl_ucx { typedef struct dyad_dtl_ucx dyad_dtl_ucx_t; -dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_ucx_t **dtl_handle, +dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t *self, dyad_dtl_mode_t mode, flux_t *h, bool debug); -dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* self, const char* upath, - uint32_t producer_rank, json_t** packed_obj); +dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, const char* restrict upath, + uint32_t producer_rank, json_t** restrict packed_obj); dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char** upath); diff --git a/src/modules/Makefile.am b/src/modules/Makefile.am index d0a8ee27..56c0e3e8 100644 --- a/src/modules/Makefile.am +++ b/src/modules/Makefile.am @@ -1,7 +1,6 @@ lib_LTLIBRARIES = dyad.la dyad_la_SOURCES = \ - dyad.c \ - dyad_flux_log.h + dyad.c dyad_la_LDFLAGS = \ $(AM_LDFLAGS) \ -module \ diff --git a/src/modules/dyad_flux_log.h b/src/modules/dyad_flux_log.h deleted file mode 100644 index a8a24268..00000000 --- a/src/modules/dyad_flux_log.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __DYAD_MOD_DYAD_FLUX_LOG_H__ -#define __DYAD_MOD_DYAD_FLUX_LOG_H__ - -#include - -#if !defined(DYAD_LOGGING_ON) || (DYAD_LOGGING_ON == 0) -#define DYAD_LOG_INFO(dyad_ctx, ...) do {} while (0) -#define DYAD_LOG_ERR(dyad_ctx, ...) do {} while (0) -#define FLUX_LOG_INFO(flux_ctx, ...) do {} while (0) -#define FLUX_LOG_ERR(flux_ctx, ...) do {} while (0) -#else -#define DYAD_LOG_INFO(dyad_ctx, ...) flux_log (\ - dyad_ctx->h, LOG_INFO, __VA_ARGS__) -#define DYAD_LOG_ERR(dyad_ctx, ...) flux_log_error (\ - dyad_ctx->h, __VA_ARGS__) -#define FLUX_LOG_INFO(flux_ctx, ...) flux_log (\ - flux_ctx, LOG_INFO, __VA_ARGS__) -#define FLUX_LOG_ERR(flux_ctx, ...) flux_log_error (\ - flux_ctx, __VA_ARGS__) -#endif - -#endif /* __DYAD_MOD_DYAD_FLUX_LOG_H__ */ diff --git a/src/utils/base64/base64.h b/src/utils/base64/base64.h index a899af4a..e04fd938 100644 --- a/src/utils/base64/base64.h +++ b/src/utils/base64/base64.h @@ -2,10 +2,19 @@ #ifndef CCAN_BASE64_H #define CCAN_BASE64_H +#ifdef __cplusplus +#include +#else #include #include +#endif + #include +#ifdef __cplusplus +extern "C" { +#endif + /** * base64_maps_t - structure to hold maps for encode/decode */ @@ -236,6 +245,8 @@ ssize_t base64_decode_tail(char dest[3], const char *src, size_t srclen) /* end rfc4648 functions */ - +#ifdef __cplusplus +} +#endif #endif /* CCAN_BASE64_H */ From 620032bdf53e439b3df3d708d0d8d0230db1de03 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 19:06:09 -0700 Subject: [PATCH 08/18] Updates .clang-format to match flux-core and applies formatting to all code --- .clang-format | 4 +- src/core/dyad_core.c | 269 +++++++++--------- src/core/dyad_core.h | 54 ++-- src/dtl/dyad_dtl.h | 8 +- src/dtl/dyad_dtl_impl.c | 32 +-- src/dtl/dyad_dtl_impl.h | 45 +-- src/dtl/dyad_flux_log.h | 5 +- src/dtl/dyad_rc.h | 42 +-- src/dtl/flux_dtl.c | 80 +++--- src/dtl/flux_dtl.h | 21 +- src/dtl/ucx_dtl.c | 476 ++++++++++++++++---------------- src/dtl/ucx_dtl.h | 30 +- src/modules/dyad.c | 124 ++++----- src/modules/urpc.c | 23 +- src/stream/dyad_stream_api.hpp | 154 ++++------- src/stream/dyad_stream_core.cpp | 42 +-- src/urpc/urpc_client.c | 5 +- src/urpc/urpc_client.h | 2 +- src/utils/read_all.h | 5 +- src/utils/utils.c | 51 ++-- src/utils/utils.h | 2 +- src/wrapper/wrapper.c | 45 ++- 22 files changed, 726 insertions(+), 793 deletions(-) diff --git a/.clang-format b/.clang-format index e26bf0be..5ec78a2f 100644 --- a/.clang-format +++ b/.clang-format @@ -10,7 +10,7 @@ AllowShortLoopsOnASingleLine : false BinPackParameters : false AllowAllParametersOfDeclarationOnNextLine : false AlignTrailingComments : true -ColumnLimit : 80 +ColumnLimit : 88 # do not put all arguments on one line unless it's the same line as the call PenaltyBreakBeforeFirstCallParameter : 10000000 @@ -20,7 +20,7 @@ PenaltyBreakString : 10 # These improve formatting results but require clang 3.6/7 or higher BreakBeforeBinaryOperators : NonAssignment AlignAfterOpenBracket: true -BinPackArguments : true +BinPackArguments : false AlignOperands : true BreakBeforeTernaryOperators : true AllowAllParametersOfDeclarationOnNextLine : false diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index fa96f8b2..e31f516d 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -1,11 +1,12 @@ #include "dyad_core.h" -#include "murmur3.h" -#include "utils.h" -#include "dyad_dtl_impl.h" #include #include +#include "dyad_dtl_impl.h" +#include "murmur3.h" +#include "utils.h" + #ifdef __cplusplus #include #include @@ -42,9 +43,16 @@ static int gen_path_key (const char* str, const uint32_t depth, const uint32_t width) { - static const uint32_t seeds[10] = {104677u, 104681u, 104683u, 104693u, - 104701u, 104707u, 104711u, 104717u, - 104723u, 104729u}; + static const uint32_t seeds[10] = {104677u, + 104681u, + 104683u, + 104693u, + 104701u, + 104707u, + 104711u, + 104717u, + 104723u, + 104729u}; uint32_t seed = 57u; uint32_t hash[4] = {0u}; // Output for the hash @@ -75,12 +83,11 @@ static int gen_path_key (const char* str, } #if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, - flux_kvs_txn_t* txn) +__attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_kvs_commit ( + const dyad_ctx_t* ctx, + flux_kvs_txn_t* txn) #else -static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, - flux_kvs_txn_t* txn) +static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, flux_kvs_txn_t* txn) #endif { flux_future_t* f = NULL; @@ -101,9 +108,9 @@ static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, } #if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t publish_via_flux (const dyad_ctx_t* restrict ctx, - const char* restrict upath) +__attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t publish_via_flux ( + const dyad_ctx_t* restrict ctx, + const char* restrict upath) #else static inline dyad_rc_t publish_via_flux (const dyad_ctx_t* restrict ctx, const char* restrict upath) @@ -151,9 +158,9 @@ publish_done:; } #if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t dyad_commit (const dyad_ctx_t* restrict ctx, - const char* restrict fname) +__attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_commit ( + const dyad_ctx_t* restrict ctx, + const char* restrict fname) #else static inline dyad_rc_t dyad_commit (dyad_ctx_t* restrict ctx, const char* restrict fname) @@ -165,10 +172,8 @@ static inline dyad_rc_t dyad_commit (dyad_ctx_t* restrict ctx, // Extract the path to the file specified by fname relative to the // producer-managed path // This relative path will be stored in upath - if (!cmp_canonical_path_prefix (ctx->prod_managed_path, fname, upath, - PATH_MAX)) { - DYAD_LOG_INFO (ctx, "%s is not in the Producer's managed path\n", - fname); + if (!cmp_canonical_path_prefix (ctx->prod_managed_path, fname, upath, PATH_MAX)) { + DYAD_LOG_INFO (ctx, "%s is not in the Producer's managed path\n", fname); rc = DYAD_RC_OK; goto commit_done; } @@ -193,11 +198,11 @@ commit_done:; } #if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, - const char* restrict kvs_topic, - uint32_t* owner_rank, - flux_future_t** f) +__attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_kvs_lookup ( + const dyad_ctx_t* ctx, + const char* restrict kvs_topic, + uint32_t* owner_rank, + flux_future_t** f) #else static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, const char* restrict kvs_topic, @@ -209,10 +214,10 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, // Lookup information about the desired file (represented by kvs_topic) // from the Flux KVS. If there is no information, wait for it to be // made available - DYAD_LOG_INFO (ctx, "Retrieving information from KVS under the key %s\n", + DYAD_LOG_INFO (ctx, + "Retrieving information from KVS under the key %s\n", kvs_topic); - *f = flux_kvs_lookup (ctx->h, ctx->kvs_namespace, FLUX_KVS_WAITCREATE, - kvs_topic); + *f = flux_kvs_lookup (ctx->h, ctx->kvs_namespace, FLUX_KVS_WAITCREATE, kvs_topic); // If the KVS lookup failed, log an error and return DYAD_BADLOOKUP if (*f == NULL) { DYAD_LOG_ERR (ctx, "KVS lookup failed!\n"); @@ -230,10 +235,10 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, } #if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, - const char* restrict fname, - dyad_kvs_response_t** restrict resp) +__attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_fetch ( + const dyad_ctx_t* restrict ctx, + const char* restrict fname, + dyad_kvs_response_t** restrict resp) #else static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, const char* restrict fname, @@ -251,10 +256,8 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // Extract the path to the file specified by fname relative to the // consumer-managed path // This relative path will be stored in upath - if (!cmp_canonical_path_prefix (ctx->cons_managed_path, fname, upath, - PATH_MAX)) { - DYAD_LOG_INFO (ctx, "%s is not in the Consumer's managed path\n", - fname); + if (!cmp_canonical_path_prefix (ctx->cons_managed_path, fname, upath, PATH_MAX)) { + DYAD_LOG_INFO (ctx, "%s is not in the Consumer's managed path\n", fname); return DYAD_RC_OK; } DYAD_LOG_INFO (ctx, @@ -291,8 +294,7 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // Allocate and populate the dyad_kvs_response_t object. // If an error occurs, log it and return the appropriate // return code - DYAD_LOG_INFO (ctx, - "Creating KVS response object to store retrieved data\n"); + DYAD_LOG_INFO (ctx, "Creating KVS response object to store retrieved data\n"); *resp = malloc (sizeof (struct dyad_kvs_response)); if (*resp == NULL) { DYAD_LOG_ERR (ctx, "Cannot allocate a dyad_kvs_response_t object!\n"); @@ -300,10 +302,11 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, goto fetch_done; } (*resp)->fpath = malloc (strlen (upath) + 1); - if ((*resp)->fpath == NULL) - { - DYAD_LOG_ERR (ctx, "Cannot allocate a buffer for the file path in the dyad_kvs_response_t object\n"); - free(*resp); + if ((*resp)->fpath == NULL) { + DYAD_LOG_ERR (ctx, + "Cannot allocate a buffer for the file path in the " + "dyad_kvs_response_t object\n"); + free (*resp); rc = DYAD_RC_BADRESPONSE; goto fetch_done; } @@ -320,82 +323,65 @@ fetch_done:; } #if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t dyad_get_data ( +__attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_get_data ( const dyad_ctx_t* ctx, const dyad_kvs_response_t* restrict kvs_data, const char** file_data, size_t* file_len) #else -static inline dyad_rc_t dyad_get_data ( - const dyad_ctx_t* ctx, - const dyad_kvs_response_t* restrict kvs_data, - const char** file_data, - size_t* file_len) +static inline dyad_rc_t dyad_get_data (const dyad_ctx_t* ctx, + const dyad_kvs_response_t* restrict kvs_data, + const char** file_data, + size_t* file_len) #endif { dyad_rc_t rc = DYAD_RC_OK; dyad_rc_t final_rc = DYAD_RC_OK; - flux_future_t *f; + flux_future_t* f; json_t* rpc_payload; DYAD_LOG_INFO (ctx, "Packing payload for RPC to DYAD module"); - rc = ctx->dtl_handle->rpc_pack ( - ctx->dtl_handle, - kvs_data->fpath, - kvs_data->owner_rank, - &rpc_payload - ); - if (DYAD_IS_ERROR(rc)) - { - DYAD_LOG_ERR(ctx, "Cannot create JSON payload for Flux RPC to DYAD module\n"); + rc = ctx->dtl_handle->rpc_pack (ctx->dtl_handle, + kvs_data->fpath, + kvs_data->owner_rank, + &rpc_payload); + if (DYAD_IS_ERROR (rc)) { + DYAD_LOG_ERR (ctx, + "Cannot create JSON payload for Flux RPC to DYAD " + "module\n"); goto get_done; } DYAD_LOG_INFO (ctx, "Sending payload for RPC to DYAD module"); - f = flux_rpc_pack ( - ctx->h, - "dyad.fetch", - kvs_data->owner_rank, - FLUX_RPC_STREAMING, - "o", - rpc_payload - ); - if (f == NULL) - { - DYAD_LOG_ERR(ctx, "Cannot send RPC to producer module\n"); + f = flux_rpc_pack (ctx->h, + "dyad.fetch", + kvs_data->owner_rank, + FLUX_RPC_STREAMING, + "o", + rpc_payload); + if (f == NULL) { + DYAD_LOG_ERR (ctx, "Cannot send RPC to producer module\n"); rc = DYAD_RC_BADRPC; goto get_done; } DYAD_LOG_INFO (ctx, "Receive RPC response from DYAD module"); - rc = ctx->dtl_handle->rpc_recv_response ( - ctx->dtl_handle, - f - ); - if (DYAD_IS_ERROR(rc)) - { - DYAD_LOG_ERR(ctx, "Cannot receive and/or parse the RPC response\n"); + rc = ctx->dtl_handle->rpc_recv_response (ctx->dtl_handle, f); + if (DYAD_IS_ERROR (rc)) { + DYAD_LOG_ERR (ctx, "Cannot receive and/or parse the RPC response\n"); goto get_done; } DYAD_LOG_INFO (ctx, "Establish DTL connection with DYAD module"); - rc = ctx->dtl_handle->establish_connection ( - ctx->dtl_handle, - DYAD_COMM_RECV - ); - if (DYAD_IS_ERROR(rc)) { - DYAD_LOG_ERR (ctx, "Cannot establish connection with DYAD module on broker %u\n", kvs_data->owner_rank); + rc = ctx->dtl_handle->establish_connection (ctx->dtl_handle, DYAD_COMM_RECV); + if (DYAD_IS_ERROR (rc)) { + DYAD_LOG_ERR (ctx, + "Cannot establish connection with DYAD module on broker " + "%u\n", + kvs_data->owner_rank); goto get_done; } DYAD_LOG_INFO (ctx, "Receive file data via DTL"); - rc = ctx->dtl_handle->recv ( - ctx->dtl_handle, - (void**) file_data, - file_len - ); + rc = ctx->dtl_handle->recv (ctx->dtl_handle, (void**)file_data, file_len); DYAD_LOG_INFO (ctx, "Close DTL connection with DYAD module"); - ctx->dtl_handle->close_connection ( - ctx->dtl_handle - ); - if (DYAD_IS_ERROR(rc)) - { + ctx->dtl_handle->close_connection (ctx->dtl_handle); + if (DYAD_IS_ERROR (rc)) { DYAD_LOG_ERR (ctx, "Cannot receive data from producer module\n"); goto get_done; } @@ -403,18 +389,24 @@ static inline dyad_rc_t dyad_get_data ( rc = DYAD_RC_OK; get_done:; - // There are two return codes that have special meaning when coming from the DTL: + // There are two return codes that have special meaning when coming from the + // DTL: // * DYAD_RC_RPC_FINISHED: occurs when an ENODATA error occurs // * DYAD_RC_BADRPC: occurs when a previous RPC operation fails - // In either of these cases, we do not need to wait for the end of stream because - // the RPC is already completely messed up. - // If we do not have either of these cases, we will wait for one more RPC message. - // If everything went well in the module, this last message will set errno to ENODATA (i.e., end of stream). - // Otherwise, something went wrong, so we'll return DYAD_RC_BADRPC. + // In either of these cases, we do not need to wait for the end of stream + // because the RPC is already completely messed up. If we do not have either + // of these cases, we will wait for one more RPC message. If everything went + // well in the module, this last message will set errno to ENODATA (i.e., + // end of stream). Otherwise, something went wrong, so we'll return + // DYAD_RC_BADRPC. DYAD_LOG_INFO (ctx, "Wait for end-of-stream message from module\n"); if (rc != DYAD_RC_RPC_FINISHED && rc != DYAD_RC_BADRPC) { if (!(flux_rpc_get (f, NULL) < 0 && errno == ENODATA)) { - DYAD_LOG_ERR (ctx, "An error occured at end of getting data! Either the module sent too many responses, or the module failed with a bad error (errno = %d)\n", errno); + DYAD_LOG_ERR (ctx, + "An error occured at end of getting data! Either the " + "module sent too many responses, or the module " + "failed with a bad error (errno = %d)\n", + errno); rc = DYAD_RC_BADRPC; } } @@ -424,9 +416,9 @@ get_done:; } #if DYAD_PERFFLOW -__attribute__ ((annotate ("@critical_path()"))) -static dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, - const dyad_kvs_response_t* restrict kvs_data) +__attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_pull ( + const dyad_ctx_t* restrict ctx, + const dyad_kvs_response_t* restrict kvs_data) #else static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, const dyad_kvs_response_t* restrict kvs_data) @@ -447,7 +439,7 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, // Call dyad_get_data to dispatch a RPC to the producer's Flux broker // and retrieve the data associated with the file rc = dyad_get_data (ctx, kvs_data, &file_data, &file_len); - if (DYAD_IS_ERROR(rc)) { + if (DYAD_IS_ERROR (rc)) { goto pull_done; } @@ -460,8 +452,7 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, // Create the directory as needed // TODO: Need to be consistent with the mode at the source odir = dirname (file_path_copy); - if ((strncmp (odir, ".", strlen (".")) != 0) - && (mkdir_as_needed (odir, m) < 0)) { + if ((strncmp (odir, ".", strlen (".")) != 0) && (mkdir_as_needed (odir, m) < 0)) { DYAD_LOG_ERR (ctx, "Cannot create needed directories for pulled " "file\n"); @@ -492,7 +483,7 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, pull_done: if (file_data != NULL) { - free(file_data); + free (file_data); } // If "check" is set and the operation was successful, set the // DYAD_CHECK_ENV environment variable to "ok" @@ -584,8 +575,7 @@ dyad_rc_t dyad_init (bool debug, const size_t namespace_len = strlen (kvs_namespace); (*ctx)->kvs_namespace = (char*)malloc (namespace_len + 1); if ((*ctx)->kvs_namespace == NULL) { - FLUX_LOG_ERR ((*ctx)->h, - "Could not allocate buffer for KVS namespace!\n"); + FLUX_LOG_ERR ((*ctx)->h, "Could not allocate buffer for KVS namespace!\n"); free (*ctx); *ctx = NULL; return DYAD_RC_NOCTX; @@ -594,14 +584,8 @@ dyad_rc_t dyad_init (bool debug, // Initialize the DTL based on the value of dtl_mode // If an error occurs, log it and return an error FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: inintializing DYAD DTL"); - rc = dyad_dtl_init( - &(*ctx)->dtl_handle, - dtl_mode, - (*ctx)->h, - (*ctx)->debug - ); - if (DYAD_IS_ERROR(rc)) - { + rc = dyad_dtl_init (&(*ctx)->dtl_handle, dtl_mode, (*ctx)->h, (*ctx)->debug); + if (DYAD_IS_ERROR (rc)) { FLUX_LOG_ERR ((*ctx)->h, "Cannot initialize the DTL\n"); return rc; } @@ -622,8 +606,7 @@ dyad_rc_t dyad_init (bool debug, *ctx = NULL; return DYAD_RC_NOCTX; } - strncpy ((*ctx)->prod_managed_path, prod_managed_path, - prod_path_len + 1); + strncpy ((*ctx)->prod_managed_path, prod_managed_path, prod_path_len + 1); } // If the consumer-managed path is provided, copy it into // the dyad_ctx_t object @@ -643,8 +626,7 @@ dyad_rc_t dyad_init (bool debug, *ctx = NULL; return DYAD_RC_NOCTX; } - strncpy ((*ctx)->cons_managed_path, cons_managed_path, - cons_path_len + 1); + strncpy ((*ctx)->cons_managed_path, cons_managed_path, cons_path_len + 1); } // Initialization is now complete! // Set reenter and initialized to indicate this. @@ -654,17 +636,17 @@ dyad_rc_t dyad_init (bool debug, return DYAD_RC_OK; } -dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) +dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) { - char *e = NULL; + char* e = NULL; bool debug = false; bool check = false; bool shared_storage = false; unsigned int key_depth = 0; unsigned int key_bins = 0; - char *kvs_namespace = NULL; - char *prod_managed_path = NULL; - char *cons_managed_path = NULL; + char* kvs_namespace = NULL; + char* prod_managed_path = NULL; + char* cons_managed_path = NULL; size_t dtl_mode_env_len = 0; dyad_dtl_mode_t dtl_mode = DYAD_DTL_UCX; @@ -677,7 +659,9 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) } if (debug) - fprintf (stderr, "DYAD_CORE: Initializing with environment variables\n"); + fprintf (stderr, + "DYAD_CORE: Initializing with environment " + "variables\n"); if ((e = getenv (DYAD_SYNC_CHECK_ENV))) { check = true; @@ -729,8 +713,10 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) dtl_mode = DYAD_DTL_UCX; } else { if (debug) { - fprintf (stderr, "Invalid DTL mode provided through %s. \ - Defaulting to UCX\n", DYAD_DTL_MODE_ENV); + fprintf (stderr, + "Invalid DTL mode provided through %s. \ + Defaulting to UCX\n", + DYAD_DTL_MODE_ENV); } dtl_mode = DYAD_DTL_UCX; } @@ -738,10 +724,19 @@ dyad_rc_t dyad_init_env (dyad_ctx_t **ctx) dtl_mode = DYAD_DTL_UCX; } if (debug) - fprintf (stderr, "DYAD_CORE: retrieved configuration from environment. Now initializing DYAD\n"); - return dyad_init (debug, check, shared_storage, key_depth, key_bins, - kvs_namespace, prod_managed_path, cons_managed_path, - dtl_mode, ctx); + fprintf (stderr, + "DYAD_CORE: retrieved configuration from environment. Now " + "initializing DYAD\n"); + return dyad_init (debug, + check, + shared_storage, + key_depth, + key_bins, + kvs_namespace, + prod_managed_path, + cons_managed_path, + dtl_mode, + ctx); } dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) @@ -753,8 +748,7 @@ dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) } // If the producer-managed path is NULL or empty, then the context is not // valid for a producer operation. So, return DYAD_BADMANAGEDPATH - if (ctx->prod_managed_path == NULL - || strlen (ctx->prod_managed_path) == 0) { + if (ctx->prod_managed_path == NULL || strlen (ctx->prod_managed_path) == 0) { return DYAD_RC_BADMANAGEDPATH; } // If the context is valid, call dyad_commit to perform @@ -772,8 +766,7 @@ dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) } // If the consumer-managed path is NULL or empty, then the context is not // valid for a consumer operation. So, return DYAD_BADMANAGEDPATH - if (ctx->cons_managed_path == NULL - || strlen (ctx->cons_managed_path) == 0) { + if (ctx->cons_managed_path == NULL || strlen (ctx->cons_managed_path) == 0) { return DYAD_RC_BADMANAGEDPATH; } // Set reenter to false to avoid recursively performing diff --git a/src/core/dyad_core.h b/src/core/dyad_core.h index 2a43d18a..47cd1961 100644 --- a/src/core/dyad_core.h +++ b/src/core/dyad_core.h @@ -4,8 +4,8 @@ #include "dyad_envs.h" #include "dyad_rc.h" // Includes -#include "dyad_flux_log.h" #include "dyad_dtl.h" +#include "dyad_flux_log.h" #ifdef __cplusplus #include @@ -23,7 +23,7 @@ *****************************************************************************/ // Now defined in src/utils/utils.h -//#define DYAD_PATH_DELIM "/" +// #define DYAD_PATH_DELIM "/" #ifdef __cplusplus extern "C" { @@ -33,19 +33,19 @@ extern "C" { * @struct dyad_ctx */ struct dyad_ctx { - flux_t* h; // the Flux handle for DYAD - struct dyad_dtl *dtl_handle; // Opaque handle to DTL info - bool debug; // if true, perform debug logging - bool check; // if true, perform some check logging - bool reenter; // if false, do not recursively enter DYAD - bool initialized; // if true, DYAD is initialized - bool shared_storage; // if true, the managed path is shared - unsigned int key_depth; // Depth of bins for the Flux KVS - unsigned int key_bins; // Number of bins for the Flux KVS - uint32_t rank; // Flux rank for DYAD - char* kvs_namespace; // Flux KVS namespace for DYAD - char* prod_managed_path; // producer path managed by DYAD - char* cons_managed_path; // consumer path managed by DYAD + flux_t* h; // the Flux handle for DYAD + struct dyad_dtl* dtl_handle; // Opaque handle to DTL info + bool debug; // if true, perform debug logging + bool check; // if true, perform some check logging + bool reenter; // if false, do not recursively enter DYAD + bool initialized; // if true, DYAD is initialized + bool shared_storage; // if true, the managed path is shared + unsigned int key_depth; // Depth of bins for the Flux KVS + unsigned int key_bins; // Number of bins for the Flux KVS + uint32_t rank; // Flux rank for DYAD + char* kvs_namespace; // Flux KVS namespace for DYAD + char* prod_managed_path; // producer path managed by DYAD + char* cons_managed_path; // consumer path managed by DYAD }; DYAD_DLL_EXPORTED extern const struct dyad_ctx dyad_ctx_default; typedef struct dyad_ctx dyad_ctx_t; @@ -87,15 +87,15 @@ typedef struct dyad_ctx dyad_ctx_t; * @return An integer error code (values TBD) */ DYAD_DLL_EXPORTED dyad_rc_t dyad_init (bool debug, - bool check, - bool shared_storage, - unsigned int key_depth, - unsigned int key_bins, - const char* kvs_namespace, - const char* prod_managed_path, - const char* cons_managed_path, - dyad_dtl_mode_t dtl_mode, - dyad_ctx_t** ctx); + bool check, + bool shared_storage, + unsigned int key_depth, + unsigned int key_bins, + const char* kvs_namespace, + const char* prod_managed_path, + const char* cons_managed_path, + dyad_dtl_mode_t dtl_mode, + dyad_ctx_t** ctx); /** * @brief Intialize the DYAD context using environment variables @@ -115,7 +115,8 @@ DYAD_DLL_EXPORTED dyad_rc_t dyad_init_env (dyad_ctx_t** ctx); #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -DYAD_DLL_EXPORTED dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname); +DYAD_DLL_EXPORTED dyad_rc_t +dyad_produce (dyad_ctx_t* ctx, const char* fname); /** * @brief Wrapper function that performs all the common tasks needed @@ -128,7 +129,8 @@ DYAD_DLL_EXPORTED dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname); #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -DYAD_DLL_EXPORTED dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname); +DYAD_DLL_EXPORTED dyad_rc_t +dyad_consume (dyad_ctx_t* ctx, const char* fname); /** * @brief Finalizes the DYAD instance and deallocates the context diff --git a/src/dtl/dyad_dtl.h b/src/dtl/dyad_dtl.h index ceca3f9c..9e1cecec 100644 --- a/src/dtl/dyad_dtl.h +++ b/src/dtl/dyad_dtl.h @@ -4,12 +4,8 @@ #ifdef __cplusplus extern "C" { #endif - -enum dyad_dtl_mode { - DYAD_DTL_UCX = 0, - DYAD_DTL_FLUX_RPC = 1, - DYAD_DTL_END = 2 -}; + +enum dyad_dtl_mode { DYAD_DTL_UCX = 0, DYAD_DTL_FLUX_RPC = 1, DYAD_DTL_END = 2 }; typedef enum dyad_dtl_mode dyad_dtl_mode_t; struct dyad_dtl; diff --git a/src/dtl/dyad_dtl_impl.c b/src/dtl/dyad_dtl_impl.c index 4f12b7b0..dd002909 100644 --- a/src/dtl/dyad_dtl_impl.c +++ b/src/dtl/dyad_dtl_impl.c @@ -1,10 +1,12 @@ #include "dyad_dtl_impl.h" -#include "ucx_dtl.h" #include "flux_dtl.h" +#include "ucx_dtl.h" -dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, - dyad_dtl_mode_t mode, flux_t *h, bool debug) +dyad_rc_t dyad_dtl_init (dyad_dtl_t **dtl_handle, + dyad_dtl_mode_t mode, + flux_t *h, + bool debug) { dyad_rc_t rc = DYAD_RC_OK; *dtl_handle = malloc (sizeof (struct dyad_dtl)); @@ -13,24 +15,14 @@ dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, } (*dtl_handle)->mode = mode; if (mode == DYAD_DTL_UCX) { - rc = dyad_dtl_ucx_init ( - *dtl_handle, - mode, - h, - debug - ); - if (DYAD_IS_ERROR(rc)) { + rc = dyad_dtl_ucx_init (*dtl_handle, mode, h, debug); + if (DYAD_IS_ERROR (rc)) { return rc; } return DYAD_RC_OK; } else if (mode == DYAD_DTL_FLUX_RPC) { - rc = dyad_dtl_flux_init ( - *dtl_handle, - mode, - h, - debug - ); - if (DYAD_IS_ERROR(rc)) { + rc = dyad_dtl_flux_init (*dtl_handle, mode, h, debug); + if (DYAD_IS_ERROR (rc)) { return rc; } return DYAD_RC_OK; @@ -38,7 +30,7 @@ dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, return DYAD_RC_BADDTLMODE; } -dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle) +dyad_rc_t dyad_dtl_finalize (dyad_dtl_t **dtl_handle) { dyad_rc_t rc = DYAD_RC_OK; if (dtl_handle == NULL || *dtl_handle == NULL) @@ -50,14 +42,14 @@ dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle) if ((*dtl_handle)->mode == DYAD_DTL_UCX) { if ((*dtl_handle)->private.ucx_dtl_handle != NULL) { rc = dyad_dtl_ucx_finalize (dtl_handle); - if (DYAD_IS_ERROR(rc)) { + if (DYAD_IS_ERROR (rc)) { return rc; } } } else if ((*dtl_handle)->mode == DYAD_DTL_FLUX_RPC) { if ((*dtl_handle)->private.flux_dtl_handle != NULL) { rc = dyad_dtl_flux_finalize (dtl_handle); - if (DYAD_IS_ERROR(rc)) { + if (DYAD_IS_ERROR (rc)) { return rc; } } diff --git a/src/dtl/dyad_dtl_impl.h b/src/dtl/dyad_dtl_impl.h index 18bfcc9f..b08ce739 100644 --- a/src/dtl/dyad_dtl_impl.h +++ b/src/dtl/dyad_dtl_impl.h @@ -1,13 +1,12 @@ #ifndef DYAD_DTL_IMPL_H #define DYAD_DTL_IMPL_H -#include "dyad_rc.h" -#include "dyad_flux_log.h" - #include #include #include "dyad_dtl.h" +#include "dyad_flux_log.h" +#include "dyad_rc.h" #ifdef __cplusplus #include @@ -29,9 +28,10 @@ union dyad_dtl_private { typedef union dyad_dtl_private dyad_dtl_private_t; enum dyad_dtl_comm_mode { - DYAD_COMM_NONE = 0, // Sanity check value for when connection isn't established - DYAD_COMM_RECV = 1, // DTL connection will only receive data - DYAD_COMM_SEND = 2, // DTL connection will only send data + DYAD_COMM_NONE = 0, // Sanity check value for when + // connection isn't established + DYAD_COMM_RECV = 1, // DTL connection will only receive data + DYAD_COMM_SEND = 2, // DTL connection will only send data DYAD_COMM_END = 3 }; typedef enum dyad_dtl_comm_mode dyad_dtl_comm_mode_t; @@ -39,24 +39,29 @@ typedef enum dyad_dtl_comm_mode dyad_dtl_comm_mode_t; struct dyad_dtl { dyad_dtl_private_t private; dyad_dtl_mode_t mode; - dyad_rc_t (*rpc_pack)(struct dyad_dtl* restrict self, const char* restrict upath, - uint32_t producer_rank, json_t** restrict packed_obj); - dyad_rc_t (*rpc_unpack)(struct dyad_dtl* self, - const flux_msg_t* packed_obj, char** upath); - dyad_rc_t (*rpc_respond)(struct dyad_dtl* self, const flux_msg_t* orig_msg); - dyad_rc_t (*rpc_recv_response)(struct dyad_dtl* self, flux_future_t* f); - dyad_rc_t (*establish_connection)(struct dyad_dtl* self, - dyad_dtl_comm_mode_t comm_mode); - dyad_rc_t (*send)(struct dyad_dtl* self, void* buf, size_t buflen); - dyad_rc_t (*recv)(struct dyad_dtl* self, void** buf, size_t* buflen); - dyad_rc_t (*close_connection)(struct dyad_dtl* self); + dyad_rc_t (*rpc_pack) (struct dyad_dtl* restrict self, + const char* restrict upath, + uint32_t producer_rank, + json_t** restrict packed_obj); + dyad_rc_t (*rpc_unpack) (struct dyad_dtl* self, + const flux_msg_t* packed_obj, + char** upath); + dyad_rc_t (*rpc_respond) (struct dyad_dtl* self, const flux_msg_t* orig_msg); + dyad_rc_t (*rpc_recv_response) (struct dyad_dtl* self, flux_future_t* f); + dyad_rc_t (*establish_connection) (struct dyad_dtl* self, + dyad_dtl_comm_mode_t comm_mode); + dyad_rc_t (*send) (struct dyad_dtl* self, void* buf, size_t buflen); + dyad_rc_t (*recv) (struct dyad_dtl* self, void** buf, size_t* buflen); + dyad_rc_t (*close_connection) (struct dyad_dtl* self); }; typedef struct dyad_dtl dyad_dtl_t; -dyad_rc_t dyad_dtl_init(dyad_dtl_t **dtl_handle, - dyad_dtl_mode_t mode, flux_t *h, bool debug); +dyad_rc_t dyad_dtl_init (dyad_dtl_t** dtl_handle, + dyad_dtl_mode_t mode, + flux_t* h, + bool debug); -dyad_rc_t dyad_dtl_finalize(dyad_dtl_t **dtl_handle); +dyad_rc_t dyad_dtl_finalize (dyad_dtl_t** dtl_handle); #ifdef __cplusplus } diff --git a/src/dtl/dyad_flux_log.h b/src/dtl/dyad_flux_log.h index 1e77bf1b..da196ef9 100644 --- a/src/dtl/dyad_flux_log.h +++ b/src/dtl/dyad_flux_log.h @@ -21,13 +21,12 @@ extern "C" { do { \ } while (0) #else -#define DYAD_LOG_INFO(dyad_ctx, ...) \ - flux_log (dyad_ctx->h, LOG_INFO, __VA_ARGS__) +#define DYAD_LOG_INFO(dyad_ctx, ...) flux_log (dyad_ctx->h, LOG_INFO, __VA_ARGS__) #define DYAD_LOG_ERR(dyad_ctx, ...) flux_log_error (dyad_ctx->h, __VA_ARGS__) #define FLUX_LOG_INFO(flux_ctx, ...) flux_log (flux_ctx, LOG_INFO, __VA_ARGS__) #define FLUX_LOG_ERR(flux_ctx, ...) flux_log_error (flux_ctx, __VA_ARGS__) #endif - + #ifdef __cplusplus } #endif diff --git a/src/dtl/dyad_rc.h b/src/dtl/dyad_rc.h index 1ecf3166..881be69a 100644 --- a/src/dtl/dyad_rc.h +++ b/src/dtl/dyad_rc.h @@ -2,7 +2,7 @@ #define DYAD_CORE_DYAD_RC_H #if BUILDING_DYAD -#define DYAD_DLL_EXPORTED __attribute__((__visibility__("default"))) +#define DYAD_DLL_EXPORTED __attribute__ ((__visibility__ ("default"))) #else #define DYAD_DLL_EXPORTED #endif @@ -12,26 +12,26 @@ extern "C" { #endif enum dyad_core_return_codes { - DYAD_RC_OK = 0, // Operation worked correctly - DYAD_RC_SYSFAIL = -1, // Some sys call or C standard - // library call failed - DYAD_RC_NOCTX = -2, // No DYAD Context found - DYAD_RC_FLUXFAIL = -3, // Some Flux function failed - DYAD_RC_BADCOMMIT = -4, // Flux KVS commit didn't work - DYAD_RC_BADLOOKUP = -5, // Flux KVS lookup didn't work - DYAD_RC_BADFETCH = -6, // Flux KVS commit didn't work - DYAD_RC_BADRESPONSE = -7, // Cannot create/populate a DYAD response - DYAD_RC_BADRPC = -8, // Flux RPC pack or get didn't work - DYAD_RC_BADFIO = -9, // File I/O failed - DYAD_RC_BADMANAGEDPATH = -10, // Cons or Prod Manged Path is bad - DYAD_RC_BADDTLMODE = -11, // Invalid DYAD DTL mode provided - DYAD_RC_BADPACK = -12, // JSON RPC payload packing failed - DYAD_RC_UCXINIT_FAIL = -13, // UCX initialization failed - DYAD_RC_UCXWAIT_FAIL = -14, // UCX wait (either custom or - // 'ucp_worker_wait') failed - DYAD_RC_UCXCOMM_FAIL = -15, // UCX communication routine failed - DYAD_RC_RPC_FINISHED = -16, // The Flux RPC responded with ENODATA (i.e., - // end of stream) sooner than expected + DYAD_RC_OK = 0, // Operation worked correctly + DYAD_RC_SYSFAIL = -1, // Some sys call or C standard + // library call failed + DYAD_RC_NOCTX = -2, // No DYAD Context found + DYAD_RC_FLUXFAIL = -3, // Some Flux function failed + DYAD_RC_BADCOMMIT = -4, // Flux KVS commit didn't work + DYAD_RC_BADLOOKUP = -5, // Flux KVS lookup didn't work + DYAD_RC_BADFETCH = -6, // Flux KVS commit didn't work + DYAD_RC_BADRESPONSE = -7, // Cannot create/populate a DYAD response + DYAD_RC_BADRPC = -8, // Flux RPC pack or get didn't work + DYAD_RC_BADFIO = -9, // File I/O failed + DYAD_RC_BADMANAGEDPATH = -10, // Cons or Prod Manged Path is bad + DYAD_RC_BADDTLMODE = -11, // Invalid DYAD DTL mode provided + DYAD_RC_BADPACK = -12, // JSON RPC payload packing failed + DYAD_RC_UCXINIT_FAIL = -13, // UCX initialization failed + DYAD_RC_UCXWAIT_FAIL = -14, // UCX wait (either custom or + // 'ucp_worker_wait') failed + DYAD_RC_UCXCOMM_FAIL = -15, // UCX communication routine failed + DYAD_RC_RPC_FINISHED = -16, // The Flux RPC responded with ENODATA (i.e., + // end of stream) sooner than expected }; typedef enum dyad_core_return_codes dyad_rc_t; diff --git a/src/dtl/flux_dtl.c b/src/dtl/flux_dtl.c index ac945241..39b4930d 100644 --- a/src/dtl/flux_dtl.c +++ b/src/dtl/flux_dtl.c @@ -1,9 +1,11 @@ #include "flux_dtl.h" dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* dtl_handle, - dyad_dtl_mode_t mode, flux_t* h, bool debug) + dyad_dtl_mode_t mode, + flux_t* h, + bool debug) { - self->private.flux_dtl_handle = malloc(sizeof(struct dyad_dtl_flux)); + self->private.flux_dtl_handle = malloc (sizeof (struct dyad_dtl_flux)); if (self->private.flux_dtl_handle == NULL) { FLUX_LOG_ERR (h, "Cannot allocate the Flux DTL handle\n"); return DYAD_RC_SYSFAIL; @@ -12,7 +14,7 @@ dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* dtl_handle, self->private.flux_dtl_handle->debug = debug; self->private.flux_dtl_handle->f = NULL; self->private.flux_dtl_handle->msg = NULL; - + self->rpc_pack = dyad_dtl_flux_rpc_pack; self->rpc_unpack = dyad_dtl_flux_rpc_unpack; self->rpc_respond = dyad_dtl_flux_rpc_respond; @@ -21,19 +23,17 @@ dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* dtl_handle, self->send = dyad_dtl_flux_send; self->recv = dyad_dtl_flux_recv; self->close_connection = dyad_dtl_flux_close_connection; - + return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, const char* restrict upath, - uint32_t producer_rank, json_t** restrict packed_obj) +dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, + const char* restrict upath, + uint32_t producer_rank, + json_t** restrict packed_obj) { dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; - *packed_obj = json_pack( - "{s:s}", - "upath", - upath - ); + *packed_obj = json_pack ("{s:s}", "upath", upath); if (*packed_obj == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath for Flux DTL\n"); return DYAD_RC_BADPACK; @@ -42,21 +42,14 @@ dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, const char* restric } dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, - const flux_msg_t* msg, char** upath) + const flux_msg_t* msg, + char** upath) { int errcode = 0; - errcode = flux_request_unpack ( - msg, - NULL, - "{s:s}", - "upath", - upath - ); + errcode = flux_request_unpack (msg, NULL, "{s:s}", "upath", upath); if (errcode < 0) { - FLUX_LOG_ERR ( - self->private.flux_dtl_handle->h, - "Could not unpack Flux message from consumer\n" - ); + FLUX_LOG_ERR (self->private.flux_dtl_handle->h, + "Could not unpack Flux message from consumer\n"); // TODO create new RC for this return -1; } @@ -64,8 +57,7 @@ dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_flux_rpc_respond (dyad_dtl_t* self, - const flux_msg_t* orig_msg) +dyad_rc_t dyad_dtl_flux_rpc_respond (dyad_dtl_t* self, const flux_msg_t* orig_msg) { return DYAD_RC_OK; } @@ -77,7 +69,7 @@ dyad_rc_t dyad_dtl_flux_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f) } dyad_rc_t dyad_dtl_flux_establish_connection (dyad_dtl_t* self, - dyad_dtl_comm_mode_t comm_mode) + dyad_dtl_comm_mode_t comm_mode) { return DYAD_RC_OK; } @@ -85,28 +77,21 @@ dyad_rc_t dyad_dtl_flux_establish_connection (dyad_dtl_t* self, dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen) { int errcode = 0; - FLUX_LOG_INFO ( - self->private.flux_dtl_handle->h, - "Send data to consumer using a Flux RPC response" - ); - errcode = flux_respond_raw ( - self->private.flux_dtl_handle->h, - self->private.flux_dtl_handle->msg, - buf, - (int) buflen - ); + FLUX_LOG_INFO (self->private.flux_dtl_handle->h, + "Send data to consumer using a Flux RPC response"); + errcode = flux_respond_raw (self->private.flux_dtl_handle->h, + self->private.flux_dtl_handle->msg, + buf, + (int)buflen); if (errcode < 0) { - FLUX_LOG_ERR ( - self->private.flux_dtl_handle->h, - "Could not send Flux RPC response containing file contents\n" - ); + FLUX_LOG_ERR (self->private.flux_dtl_handle->h, + "Could not send Flux RPC response containing file " + "contents\n"); return DYAD_RC_FLUXFAIL; } if (self->private.flux_dtl_handle->debug) { - FLUX_LOG_INFO ( - self->private.flux_dtl_handle->h, - "Successfully sent file contents to consumer\n" - ); + FLUX_LOG_INFO (self->private.flux_dtl_handle->h, + "Successfully sent file contents to consumer\n"); } return DYAD_RC_OK; } @@ -118,11 +103,12 @@ dyad_rc_t dyad_dtl_flux_recv (dyad_dtl_t* self, void** buf, size_t* buflen) dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; FLUX_LOG_INFO (dtl_handle->h, "Get file contents from module using Flux RPC\n"); if (dtl_handle->f == NULL) { - FLUX_LOG_ERR (dtl_handle->h, "Cannot get data using RPC without a Flux future\n"); + FLUX_LOG_ERR (dtl_handle->h, + "Cannot get data using RPC without a Flux future\n"); // TODO create new RC for this return -1; } - rc = flux_rpc_get_raw(dtl_handle->f, (const void**) buf, (int*) buflen); + rc = flux_rpc_get_raw (dtl_handle->f, (const void**)buf, (int*)buflen); if (rc < 0) { FLUX_LOG_ERR (dtl_handle->h, "Could not get file data from Flux RPC\n"); if (errno == ENODATA) @@ -148,7 +134,7 @@ dyad_rc_t dyad_dtl_flux_finalize (dyad_dtl_t** self) (*self)->private.flux_dtl_handle->h = NULL; (*self)->private.flux_dtl_handle->f = NULL; (*self)->private.flux_dtl_handle->msg = NULL; - free((*self)->private.flux_dtl_handle); + free ((*self)->private.flux_dtl_handle); (*self)->private.flux_dtl_handle = NULL; return DYAD_RC_OK; } \ No newline at end of file diff --git a/src/dtl/flux_dtl.h b/src/dtl/flux_dtl.h index 34dd14a6..f58580b6 100644 --- a/src/dtl/flux_dtl.h +++ b/src/dtl/flux_dtl.h @@ -1,34 +1,39 @@ #ifndef DYAD_DTL_FLUX_H #define DYAD_DTL_FLUX_H -#include "dyad_dtl_impl.h" - #include +#include "dyad_dtl_impl.h" + struct dyad_dtl_flux { flux_t* h; bool debug; - flux_future_t *f; + flux_future_t* f; flux_msg_t* msg; }; typedef struct dyad_dtl_flux dyad_dtl_flux_t; dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* self, - dyad_dtl_mode_t mode, flux_t* h, bool debug); + dyad_dtl_mode_t mode, + flux_t* h, + bool debug); -dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, const char* restrict upath, - uint32_t producer_rank, json_t** restrict packed_obj); +dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, + const char* restrict upath, + uint32_t producer_rank, + json_t** restrict packed_obj); dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, - const flux_msg_t* msg, char** upath); + const flux_msg_t* msg, + char** upath); dyad_rc_t dyad_dtl_flux_rpc_respond (dyad_dtl_t* self, const flux_msg_t* orig_msg); dyad_rc_t dyad_dtl_flux_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f); dyad_rc_t dyad_dtl_flux_establish_connection (dyad_dtl_t* self, - dyad_dtl_comm_mode_t comm_mode); + dyad_dtl_comm_mode_t comm_mode); dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen); diff --git a/src/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c index e92a014a..df691962 100644 --- a/src/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -1,12 +1,12 @@ #include "ucx_dtl.h" -#include "base64.h" - -#include #include #include +#include #include +#include "base64.h" + extern const base64_maps_t base64_maps_rfc4648; // Tag mask for UCX Tag send/recv @@ -25,9 +25,9 @@ typedef struct ucx_request dyad_ucx_request_t; // Define a function that UCX will use to allocate and // initialize our request struct -static void dyad_ucx_request_init(void *request) +static void dyad_ucx_request_init (void* request) { - dyad_ucx_request_t *real_request = NULL; + dyad_ucx_request_t* real_request = NULL; real_request = (dyad_ucx_request_t*)request; real_request->completed = 0; } @@ -35,42 +35,45 @@ static void dyad_ucx_request_init(void *request) // Define a function that ucp_tag_msg_recv_nbx will use // as a callback to signal the completion of the async receive #if UCP_API_VERSION >= UCP_VERSION(1, 10) -static void dyad_recv_callback(void *request, ucs_status_t status, - const ucp_tag_recv_info_t *tag_info, void *user_data) +static void dyad_recv_callback (void* request, + ucs_status_t status, + const ucp_tag_recv_info_t* tag_info, + void* user_data) #else -static void dyad_recv_callback(void *request, ucs_status_t status, - ucp_tag_recv_info_t *tag_info) +static void dyad_recv_callback (void* request, + ucs_status_t status, + ucp_tag_recv_info_t* tag_info) #endif { - dyad_ucx_request_t *real_request = NULL; - real_request = (dyad_ucx_request_t*) request; + dyad_ucx_request_t* real_request = NULL; + real_request = (dyad_ucx_request_t*)request; real_request->completed = 1; } #if UCP_API_VERSION >= UCP_VERSION(1, 10) -static void dyad_send_callback(void *req, ucs_status_t status, void *ctx) +static void dyad_send_callback (void* req, ucs_status_t status, void* ctx) #else -static void dyad_send_callback(void *req, ucs_status_t status) +static void dyad_send_callback (void* req, ucs_status_t status) #endif { - dyad_ucx_request_t *real_req = (dyad_ucx_request_t*)req; + dyad_ucx_request_t* real_req = (dyad_ucx_request_t*)req; real_req->completed = 1; } -void dyad_ucx_ep_err_handler (void* arg, ucp_ep_h ep, ucs_status_t status) { - flux_t* h = (flux_t*) arg; +void dyad_ucx_ep_err_handler (void* arg, ucp_ep_h ep, ucs_status_t status) +{ + flux_t* h = (flux_t*)arg; FLUX_LOG_ERR (h, "An error occured on the UCP endpoint (status = %d)\n", status); } // Simple function used to wait on the async receive -static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, - dyad_ucx_request_t *request) +static ucs_status_t dyad_ucx_request_wait (dyad_dtl_ucx_t* dtl_handle, + dyad_ucx_request_t* request) { ucs_status_t final_request_status = UCS_OK; - // If 'request' is actually a request handle, this means the communication operation - // is scheduled, but not yet completed. - if (UCS_PTR_IS_PTR(request)) - { + // If 'request' is actually a request handle, this means the communication + // operation is scheduled, but not yet completed. + if (UCS_PTR_IS_PTR (request)) { // Spin lock until the request is completed // The spin lock shouldn't be costly (performance-wise) // because the wait should always come directly after other UCX calls @@ -78,21 +81,20 @@ static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, // In other words, prior UCX calls should mean that this loop only runs // a couple of times at most. do { - ucp_worker_progress(dtl_handle->ucx_worker); + ucp_worker_progress (dtl_handle->ucx_worker); // usleep(100); // Get the final status of the communication operation - final_request_status = ucp_request_check_status(request); + final_request_status = ucp_request_check_status (request); } while (final_request_status == UCS_INPROGRESS); // Free and deallocate the request object - ucp_request_free(request); + ucp_request_free (request); return final_request_status; } // If 'request' is actually a UCX error, this means the communication - // operation immediately failed. In that case, we simply grab the 'ucs_status_t' - // object for the error. - else if (UCS_PTR_IS_ERR(request)) - { - return UCS_PTR_STATUS(request); + // operation immediately failed. In that case, we simply grab the + // 'ucs_status_t' object for the error. + else if (UCS_PTR_IS_ERR (request)) { + return UCS_PTR_STATUS (request); } // If 'request' is neither a request handle nor an error, then // the communication operation immediately completed successfully. @@ -100,21 +102,23 @@ static ucs_status_t dyad_ucx_request_wait(dyad_dtl_ucx_t *dtl_handle, return UCS_OK; } -static inline dyad_rc_t dyad_dtl_ucx_finalize_impl (dyad_dtl_ucx_t **dtl_handle) +static inline dyad_rc_t dyad_dtl_ucx_finalize_impl (dyad_dtl_ucx_t** dtl_handle) { } -dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_t* self, dyad_dtl_mode_t mode, - flux_t *h, bool debug) +dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t* self, + dyad_dtl_mode_t mode, + flux_t* h, + bool debug) { ucp_params_t ucx_params; ucp_worker_params_t worker_params; - ucp_config_t *config; + ucp_config_t* config; ucs_status_t status; ucp_worker_attr_t worker_attrs; dyad_dtl_ucx_t* dtl_handle = NULL; - self->private.ucx_dtl_handle = malloc(sizeof(struct dyad_dtl_ucx)); + self->private.ucx_dtl_handle = malloc (sizeof (struct dyad_dtl_ucx)); if (self->private.ucx_dtl_handle == NULL) { FLUX_LOG_ERR (h, "Could not allocate UCX DTL context\n"); return DYAD_RC_SYSFAIL; @@ -135,7 +139,7 @@ dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_t* self, dyad_dtl_mode_t mode, // Read the UCX configuration FLUX_LOG_INFO (dtl_handle->h, "Reading UCP config\n"); status = ucp_config_read (NULL, NULL, &config); - if (UCX_STATUS_FAIL(status)) { + if (UCX_STATUS_FAIL (status)) { FLUX_LOG_ERR (dtl_handle->h, "Could not read the UCX config\n"); goto error; } @@ -149,32 +153,26 @@ dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_t* self, dyad_dtl_mode_t mode, // * Remote Memory Access communication // * Auto initialization of request objects // * Worker sleep, wakeup, poll, etc. features - ucx_params.field_mask = UCP_PARAM_FIELD_FEATURES | - UCP_PARAM_FIELD_REQUEST_SIZE | - UCP_PARAM_FIELD_REQUEST_INIT; + ucx_params.field_mask = UCP_PARAM_FIELD_FEATURES | UCP_PARAM_FIELD_REQUEST_SIZE + | UCP_PARAM_FIELD_REQUEST_INIT; ucx_params.features = UCP_FEATURE_TAG | // UCP_FEATURE_RMA | UCP_FEATURE_WAKEUP; - ucx_params.request_size = sizeof(struct ucx_request); + ucx_params.request_size = sizeof (struct ucx_request); ucx_params.request_init = dyad_ucx_request_init; // Initialize UCX FLUX_LOG_INFO (dtl_handle->h, "Initializing UCP\n"); - status = ucp_init(&ucx_params, config, &dtl_handle->ucx_ctx); + status = ucp_init (&ucx_params, config, &dtl_handle->ucx_ctx); // If in debug mode, print the configuration of UCX to stderr if (debug) { - ucp_config_print( - config, - stderr, - "UCX Configuration", - UCS_CONFIG_PRINT_CONFIG - ); + ucp_config_print (config, stderr, "UCX Configuration", UCS_CONFIG_PRINT_CONFIG); } // Release the config - ucp_config_release(config); + ucp_config_release (config); // Log an error if UCX initialization failed - if (UCX_STATUS_FAIL(status)) { + if (UCX_STATUS_FAIL (status)) { FLUX_LOG_ERR (h, "ucp_init failed (status = %d)\n", status); goto error; } @@ -184,38 +182,35 @@ dyad_rc_t dyad_dtl_ucx_init(dyad_dtl_t* self, dyad_dtl_mode_t mode, // The settings enabled are: // * Single-threaded mode (TODO look into multi-threading support) // * Restricting wakeup events to only include Tag-matching recv events - worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE | - UCP_WORKER_PARAM_FIELD_EVENTS; + worker_params.field_mask = + UCP_WORKER_PARAM_FIELD_THREAD_MODE | UCP_WORKER_PARAM_FIELD_EVENTS; // TODO look into multi-threading support worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; worker_params.events = UCP_WAKEUP_TAG_RECV; // Create the worker and log an error if that fails FLUX_LOG_INFO (dtl_handle->h, "Creating UCP worker\n"); - status = ucp_worker_create( - dtl_handle->ucx_ctx, - &worker_params, - &(dtl_handle->ucx_worker) - ); - if (UCX_STATUS_FAIL(status)) { - FLUX_LOG_ERR (dtl_handle->h, "ucp_worker_create failed (status = %d)!\n", status); + status = ucp_worker_create (dtl_handle->ucx_ctx, + &worker_params, + &(dtl_handle->ucx_worker)); + if (UCX_STATUS_FAIL (status)) { + FLUX_LOG_ERR (dtl_handle->h, + "ucp_worker_create failed (status = %d)!\n", + status); goto error; } // Query the worker for its address worker_attrs.field_mask = UCP_WORKER_ATTR_FIELD_ADDRESS; FLUX_LOG_INFO (dtl_handle->h, "Get address of UCP worker\n"); - status = ucp_worker_query( - dtl_handle->ucx_worker, - &worker_attrs - ); - if (UCX_STATUS_FAIL(status)) { + status = ucp_worker_query (dtl_handle->ucx_worker, &worker_attrs); + if (UCX_STATUS_FAIL (status)) { FLUX_LOG_ERR (h, "Cannot get UCX worker address (status = %d)!\n", status); goto error; } dtl_handle->consumer_address = worker_attrs.address; dtl_handle->addr_len = worker_attrs.address_length; - + self->rpc_pack = dyad_dtl_ucx_rpc_pack; self->rpc_unpack = dyad_dtl_ucx_rpc_unpack; self->rpc_respond = dyad_dtl_ucx_rpc_respond; @@ -234,8 +229,10 @@ error:; return DYAD_RC_UCXINIT_FAIL; } -dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t* restrict self, const char* restrict upath, - uint32_t producer_rank, json_t** restrict packed_obj) +dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, + const char* restrict upath, + uint32_t producer_rank, + json_t** restrict packed_obj) { size_t enc_len = 0; char* enc_buf = NULL; @@ -246,10 +243,10 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t* restrict self, const char* restrict return DYAD_RC_BADPACK; } FLUX_LOG_INFO (dtl_handle->h, "Encode UCP address using base64\n"); - enc_len = base64_encoded_length(dtl_handle->addr_len); + enc_len = base64_encoded_length (dtl_handle->addr_len); // Add 1 to encoded length because the encoded buffer will be // packed as if it is a string - enc_buf = malloc(enc_len+1); + enc_buf = malloc (enc_len + 1); if (enc_buf == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not allocate buffer for packed address\n"); return DYAD_RC_SYSFAIL; @@ -257,12 +254,14 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t* restrict self, const char* restrict // consumer_address is casted to const char* to avoid warnings // This is valid because it is a pointer to an opaque struct, // so the cast can be treated like a void*->char* cast. - enc_size = base64_encode_using_maps(&base64_maps_rfc4648, - enc_buf, enc_len+1, - (const char*)dtl_handle->consumer_address, dtl_handle->addr_len); + enc_size = base64_encode_using_maps (&base64_maps_rfc4648, + enc_buf, + enc_len + 1, + (const char*)dtl_handle->consumer_address, + dtl_handle->addr_len); if (enc_size < 0) { // TODO log error - free(enc_buf); + free (enc_buf); return DYAD_RC_BADPACK; } FLUX_LOG_INFO (dtl_handle->h, "Creating UCP tag for tag matching\n"); @@ -271,7 +270,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t* restrict self, const char* restrict // Instead, we use this function to create the tag that will be // used for the upcoming communication. uint32_t consumer_rank = 0; - if (flux_get_rank(dtl_handle->h, &consumer_rank) < 0) { + if (flux_get_rank (dtl_handle->h, &consumer_rank) < 0) { FLUX_LOG_ERR (dtl_handle->h, "Cannot get consumer rank\n"); return DYAD_RC_FLUXFAIL; } @@ -282,18 +281,17 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t* restrict self, const char* restrict // Use Jansson to pack the tag and UCX address into // the payload to be sent via RPC to the producer plugin FLUX_LOG_INFO (dtl_handle->h, "Packing RPC payload for UCX DTL\n"); - *packed_obj = json_pack( - "{s:s, s:i, s:i, s:s%}", - "upath", - upath, - "tag_prod", - (int) producer_rank, - "tag_cons", - (int) consumer_rank, - "ucx_addr", - enc_buf, enc_len - ); - free(enc_buf); + *packed_obj = json_pack ("{s:s, s:i, s:i, s:s%}", + "upath", + upath, + "tag_prod", + (int)producer_rank, + "tag_cons", + (int)consumer_rank, + "ucx_addr", + enc_buf, + enc_len); + free (enc_buf); // If the packing failed, log an error if (*packed_obj == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath and UCX address for RPC\n"); @@ -302,7 +300,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack(dyad_dtl_t* restrict self, const char* restrict return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char** upath) +dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, + const flux_msg_t* msg, + char** upath) { char* enc_addr = NULL; size_t enc_addr_len = 0; @@ -312,42 +312,42 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char ssize_t decoded_len = 0; dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; FLUX_LOG_INFO (dtl_handle->h, "Unpacking RPC payload\n"); - errcode = flux_request_unpack( - msg, - NULL, - "{s:s, s:i, s:i, s:s%}", - "upath", - upath, - "tag_prod", - &tag_prod, - "tag_cons", - &tag_cons, - "ucx_addr", - &enc_addr, - &enc_addr_len - ); - if (errcode < 0) - { - FLUX_LOG_ERR(dtl_handle->h, "Could not unpack Flux message from consumer!\n"); + errcode = flux_request_unpack (msg, + NULL, + "{s:s, s:i, s:i, s:s%}", + "upath", + upath, + "tag_prod", + &tag_prod, + "tag_cons", + &tag_cons, + "ucx_addr", + &enc_addr, + &enc_addr_len); + if (errcode < 0) { + FLUX_LOG_ERR (dtl_handle->h, "Could not unpack Flux message from consumer!\n"); return -1; } dtl_handle->comm_tag = ((uint64_t)tag_prod << 32) | (uint64_t)tag_cons; FLUX_LOG_INFO (dtl_handle->h, "Obtained upath from RPC payload: %s\n", upath); - FLUX_LOG_INFO (dtl_handle->h, "Obtained UCP tag from RPC payload: %lu\n", dtl_handle->comm_tag); + FLUX_LOG_INFO (dtl_handle->h, + "Obtained UCP tag from RPC payload: %lu\n", + dtl_handle->comm_tag); FLUX_LOG_INFO (dtl_handle->h, "Decoding consumer UCP address using base64\n"); - dtl_handle->addr_len = base64_decoded_length(enc_addr_len); - dtl_handle->consumer_address = (ucp_address_t*) malloc(dtl_handle->addr_len); + dtl_handle->addr_len = base64_decoded_length (enc_addr_len); + dtl_handle->consumer_address = (ucp_address_t*)malloc (dtl_handle->addr_len); if (dtl_handle->consumer_address == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not allocate memory for consumer address"); return DYAD_RC_SYSFAIL; } decoded_len = base64_decode_using_maps (&base64_maps_rfc4648, - (char*)dtl_handle->consumer_address, dtl_handle->addr_len, - enc_addr, enc_addr_len); - if (decoded_len < 0) - { + (char*)dtl_handle->consumer_address, + dtl_handle->addr_len, + enc_addr, + enc_addr_len); + if (decoded_len < 0) { // TODO log error - free(dtl_handle->consumer_address); + free (dtl_handle->consumer_address); dtl_handle->consumer_address = NULL; dtl_handle->addr_len = 0; return -1; @@ -360,45 +360,43 @@ dyad_rc_t dyad_dtl_ucx_rpc_respond (dyad_dtl_t* self, const flux_msg_t* orig_msg return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_rpc_recv_response(dyad_dtl_t *self, - flux_future_t *f) +dyad_rc_t dyad_dtl_ucx_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f) { return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_establish_connection(dyad_dtl_t *self, - dyad_dtl_comm_mode_t comm_mode) +dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, + dyad_dtl_comm_mode_t comm_mode) { ucp_ep_params_t params; ucs_status_t status = UCS_OK; dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; dtl_handle->curr_comm_mode = comm_mode; if (comm_mode == DYAD_COMM_SEND) { - params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | - UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | - UCP_EP_PARAM_FIELD_ERR_HANDLER; + params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS + | UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE + | UCP_EP_PARAM_FIELD_ERR_HANDLER; params.address = dtl_handle->consumer_address; params.err_mode = UCP_ERR_HANDLING_MODE_PEER; params.err_handler.cb = dyad_ucx_ep_err_handler; - params.err_handler.arg = (void*) dtl_handle->h; - FLUX_LOG_INFO (dtl_handle->h, "Create UCP endpoint for communication with consumer\n"); - status = ucp_ep_create( - dtl_handle->ucx_worker, - ¶ms, - &dtl_handle->ep - ); - if (status != UCS_OK) - { - FLUX_LOG_ERR (dtl_handle->h, "ucp_ep_create failed with status %d\n", (int) status); + params.err_handler.arg = (void*)dtl_handle->h; + FLUX_LOG_INFO (dtl_handle->h, + "Create UCP endpoint for communication with consumer\n"); + status = ucp_ep_create (dtl_handle->ucx_worker, ¶ms, &dtl_handle->ep); + if (status != UCS_OK) { + FLUX_LOG_ERR (dtl_handle->h, + "ucp_ep_create failed with status %d\n", + (int)status); return -1; } - if (dtl_handle->debug) - { - ucp_ep_print_info(dtl_handle->ep, stderr); + if (dtl_handle->debug) { + ucp_ep_print_info (dtl_handle->ep, stderr); } return DYAD_RC_OK; } else if (comm_mode == DYAD_COMM_RECV) { - FLUX_LOG_INFO (dtl_handle->h, "No explicit connection establishment needed for UCX receiver\n"); + FLUX_LOG_INFO (dtl_handle->h, + "No explicit connection establishment needed for UCX " + "receiver\n"); return DYAD_RC_OK; } else { FLUX_LOG_ERR (dtl_handle->h, "Invalid communication mode: %d\n", comm_mode); @@ -411,52 +409,51 @@ dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen) { ucs_status_ptr_t stat_ptr; ucs_status_t status = UCS_OK; - dyad_ucx_request_t *req = NULL; + dyad_ucx_request_t* req = NULL; dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; - if (dtl_handle->ep == NULL) - { - FLUX_LOG_INFO(dtl_handle->h, "UCP endpoint was not created prior to invoking send!\n"); + if (dtl_handle->ep == NULL) { + FLUX_LOG_INFO (dtl_handle->h, + "UCP endpoint was not created prior to invoking " + "send!\n"); return DYAD_RC_UCXCOMM_FAIL; } // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 // However, some systems (e.g., Lassen) may have an older verison - // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, - // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. + // This conditional compilation will use ucp_tag_send_sync_nbx if using + // UCX 1.9+, and it will use the deprecated ucp_tag_send_sync_nb if using + // UCX < 1.9. #if UCP_API_VERSION >= UCP_VERSION(1, 10) ucp_request_param_t params; params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; params.cb.send = dyad_send_callback; FLUX_LOG_INFO (dtl_handle->h, "Sending data to consumer with ucp_tag_send_nbx\n"); - stat_ptr = ucp_tag_send_nbx( - dtl_handle->ep, - buf, - buflen, - dtl_handle->comm_tag, - ¶ms - ); + stat_ptr = + ucp_tag_send_nbx (dtl_handle->ep, buf, buflen, dtl_handle->comm_tag, ¶ms); #else - FLUX_LOG_INFO (dtl_handle->h, "Sending %lu bytes of data to consumer with ucp_tag_send_nb\n", buflen); - stat_ptr = ucp_tag_send_nb( - dtl_handle->ep, - buf, - buflen, - UCP_DATATYPE_CONTIG, - dtl_handle->comm_tag, - dyad_send_callback - ); + FLUX_LOG_INFO (dtl_handle->h, + "Sending %lu bytes of data to consumer with " + "ucp_tag_send_nb\n", + buflen); + stat_ptr = ucp_tag_send_nb (dtl_handle->ep, + buf, + buflen, + UCP_DATATYPE_CONTIG, + dtl_handle->comm_tag, + dyad_send_callback); #endif FLUX_LOG_INFO (dtl_handle->h, "Processing UCP send request\n"); status = dyad_ucx_request_wait (dtl_handle, stat_ptr); - if (status != UCS_OK) - { - FLUX_LOG_ERR (dtl_handle->h, "UCP Tag Send failed (status = %d)!\n", (int)status); + if (status != UCS_OK) { + FLUX_LOG_ERR (dtl_handle->h, + "UCP Tag Send failed (status = %d)!\n", + (int)status); return DYAD_RC_UCXCOMM_FAIL; } FLUX_LOG_INFO (dtl_handle->h, "Data send with UCP succeeded\n"); return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_t *self, void **buf, size_t *buflen) +dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen) { ucs_status_t status = UCS_OK; ucp_tag_message_h msg = NULL; @@ -467,26 +464,24 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_t *self, void **buf, size_t *buflen) // TODO replace this loop with a resiliency response over RPC do { ucp_worker_progress (dtl_handle->ucx_worker); - msg = ucp_tag_probe_nb( - dtl_handle->ucx_worker, - dtl_handle->comm_tag, - DYAD_UCX_TAG_MASK, - 1, // Remove the message from UCP tracking - // Requires calling ucp_tag_msg_recv_nb - // with the ucp_tag_message_h to retrieve message - &msg_info - ); + msg = ucp_tag_probe_nb (dtl_handle->ucx_worker, + dtl_handle->comm_tag, + DYAD_UCX_TAG_MASK, + 1, // Remove the message from UCP tracking + // Requires calling ucp_tag_msg_recv_nb + // with the ucp_tag_message_h to retrieve message + &msg_info); } while (msg == NULL); - // TODO: This version of the polling code is not supposed to spin-lock, unlike the code above. - // Currently, it does not work. Once it starts working, we can replace the code above - // with a version of this code. + // TODO: This version of the polling code is not supposed to spin-lock, + // unlike the code above. Currently, it does not work. Once it starts + // working, we can replace the code above with a version of this code. // // while (true) // { // // Probe the tag recv event at the top // // of the worker's queue - // FLUX_LOG_INFO (dtl_handle->h, "Probe UCP worker with tag %lu\n", dtl_handle->comm_tag); - // msg = ucp_tag_probe_nb( + // FLUX_LOG_INFO (dtl_handle->h, "Probe UCP worker with tag %lu\n", + // dtl_handle->comm_tag); msg = ucp_tag_probe_nb( // dtl_handle->ucx_worker, // dtl_handle->comm_tag, // DYAD_UCX_TAG_MASK, @@ -499,38 +494,41 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_t *self, void **buf, size_t *buflen) // // break the loop // if (msg != NULL) // { - // FLUX_LOG_INFO (dtl_handle->h, "Data has arrived, so end polling\n"); - // break; + // FLUX_LOG_INFO (dtl_handle->h, "Data has arrived, so end + // polling\n"); break; // } // // If data has not arrived, check if there are // // any other events in the worker's queue. // // If so, start the loop over to handle the next event // else if (ucp_worker_progress(dtl_handle->ucx_worker)) // { - // FLUX_LOG_INFO (dtl_handle->h, "Progressed UCP worker to check if any other UCP events are available\n"); - // continue; + // FLUX_LOG_INFO (dtl_handle->h, "Progressed UCP worker to check if + // any other UCP events are available\n"); continue; // } // // No other events are queued. So, we will wait on new // // events to come in. By using 'ucp_worker_wait' for this, // // we let the OS do other work in the meantime (no spin locking). - // FLUX_LOG_INFO (dtl_handle->h, "Launch pre-emptable wait until UCP worker gets new events\n"); - // status = ucp_worker_wait(dtl_handle->ucx_worker); + // FLUX_LOG_INFO (dtl_handle->h, "Launch pre-emptable wait until UCP + // worker gets new events\n"); status = + // ucp_worker_wait(dtl_handle->ucx_worker); // // If the wait fails, log an error // if (UCX_STATUS_FAIL(status)) // { - // FLUX_LOG_ERR (dtl_handle->h, "Could not wait on the message from the producer plugin\n"); - // return DYAD_RC_UCXWAIT_FAIL; + // FLUX_LOG_ERR (dtl_handle->h, "Could not wait on the message from + // the producer plugin\n"); return DYAD_RC_UCXWAIT_FAIL; // } // } // The metadata retrived from the probed tag recv event contains // the size of the data to be sent. // So, use that size to allocate a buffer - FLUX_LOG_INFO (dtl_handle->h, "Got message with tag %lu and size %lu\n", msg_info.sender_tag, msg_info.length); + FLUX_LOG_INFO (dtl_handle->h, + "Got message with tag %lu and size %lu\n", + msg_info.sender_tag, + msg_info.length); *buflen = msg_info.length; - *buf = malloc(*buflen); + *buf = malloc (*buflen); // If allocation fails, log an error - if (*buf == NULL) - { + if (*buf == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not allocate memory for file\n"); return DYAD_RC_SYSFAIL; } @@ -545,39 +543,31 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_t *self, void **buf, size_t *buflen) ucp_request_param_t recv_params; // TODO consider enabling UCP_OP_ATTR_FIELD_MEMH to speedup // the recv operation if using RMA behind the scenes - recv_params.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | - UCP_OP_ATTR_FIELD_MEMORY_TYPE; + recv_params.op_attr_mask = + UCP_OP_ATTR_FIELD_CALLBACK | UCP_OP_ATTR_FIELD_MEMORY_TYPE; recv_params.cb.recv = dyad_recv_callback; // Constraining to Host memory (as opposed to GPU memory) // allows UCX to potentially perform some optimizations recv_params.memory_type = UCS_MEMORY_TYPE_HOST; // Perform the async recv operation using the probed tag recv event - req = ucp_tag_msg_recv_nbx( - dtl_handle->ucx_worker, - *buf, - *buflen, - msg, - &recv_params - ); + req = + ucp_tag_msg_recv_nbx (dtl_handle->ucx_worker, *buf, *buflen, msg, &recv_params); #else - req = ucp_tag_msg_recv_nb( - dtl_handle->ucx_worker, - *buf, - *buflen, - UCP_DATATYPE_CONTIG, - msg, - dyad_recv_callback - ); + req = ucp_tag_msg_recv_nb (dtl_handle->ucx_worker, + *buf, + *buflen, + UCP_DATATYPE_CONTIG, + msg, + dyad_recv_callback); #endif // Wait on the recv operation to complete FLUX_LOG_INFO (dtl_handle->h, "Wait for UCP recv operation to complete\n"); - status = dyad_ucx_request_wait(dtl_handle, req); + status = dyad_ucx_request_wait (dtl_handle, req); // If the recv operation failed, log an error, free the data buffer, // and set the buffer pointer to NULL - if (UCX_STATUS_FAIL(status)) - { + if (UCX_STATUS_FAIL (status)) { FLUX_LOG_ERR (dtl_handle->h, "UCX recv failed!\n"); - free(*buf); + free (*buf); *buf = NULL; return DYAD_RC_UCXCOMM_FAIL; } @@ -586,64 +576,63 @@ dyad_rc_t dyad_dtl_ucx_recv(dyad_dtl_t *self, void **buf, size_t *buflen) return DYAD_RC_OK; } -dyad_rc_t dyad_dtl_ucx_close_connection(dyad_dtl_t *self) +dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self) { ucs_status_t status = UCS_OK; ucs_status_ptr_t stat_ptr; dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; if (dtl_handle->curr_comm_mode == DYAD_COMM_SEND) { - if (dtl_handle != NULL) - { - if (dtl_handle->ep != NULL) - { - // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 - // However, some systems (e.g., Lassen) may have an older verison - // This conditional compilation will use ucp_tag_send_sync_nbx if using UCX 1.9+, - // and it will use the deprecated ucp_tag_send_sync_nb if using UCX < 1.9. + if (dtl_handle != NULL) { + if (dtl_handle->ep != NULL) { + // ucp_tag_send_sync_nbx is the prefered version of this send + // since UCX 1.9 However, some systems (e.g., Lassen) may have + // an older verison This conditional compilation will use + // ucp_tag_send_sync_nbx if using UCX 1.9+, and it will use the + // deprecated ucp_tag_send_sync_nb if using UCX < 1.9. FLUX_LOG_INFO (dtl_handle->h, "Start async closing of UCP endpoint\n"); #if UCP_API_VERSION >= UCP_VERSION(1, 10) ucp_request_param_t close_params; close_params.op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS; close_params.flags = UCP_EP_CLOSE_FLAG_FORCE; - stat_ptr = ucp_ep_close_nbx(dtl_handle->ep, &close_params); + stat_ptr = ucp_ep_close_nbx (dtl_handle->ep, &close_params); #else - // TODO change to FORCE if we decide to enable err handleing mode - stat_ptr = ucp_ep_close_nb(dtl_handle->ep, UCP_EP_CLOSE_MODE_FORCE); + // TODO change to FORCE if we decide to enable err handleing + // mode + stat_ptr = ucp_ep_close_nb (dtl_handle->ep, UCP_EP_CLOSE_MODE_FORCE); #endif FLUX_LOG_INFO (dtl_handle->h, "Wait for endpoint closing to finish\n"); // Don't use dyad_ucx_request_wait here because ep_close behaves // differently than other UCX calls - if (stat_ptr != NULL) - { + if (stat_ptr != NULL) { // Endpoint close is in-progress. // Wait until finished - if (UCS_PTR_IS_PTR(stat_ptr)) - { + if (UCS_PTR_IS_PTR (stat_ptr)) { do { - ucp_worker_progress(dtl_handle->ucx_worker); - status = ucp_request_check_status(stat_ptr); + ucp_worker_progress (dtl_handle->ucx_worker); + status = ucp_request_check_status (stat_ptr); } while (status == UCS_INPROGRESS); - ucp_request_free(stat_ptr); + ucp_request_free (stat_ptr); } // An error occurred during endpoint closure // However, the endpoint can no longer be used // Get the status code for reporting - else - { - status = UCS_PTR_STATUS(stat_ptr); + else { + status = UCS_PTR_STATUS (stat_ptr); } - if (status != UCS_OK) - { - FLUX_LOG_ERR(dtl_handle->h, "Could not successfully close Endpoint (status = %d)! However, endpoint was released.\n", status); + if (status != UCS_OK) { + FLUX_LOG_ERR (dtl_handle->h, + "Could not successfully close Endpoint " + "(status = %d)! However, endpoint was " + "released.\n", + status); } } dtl_handle->ep = NULL; } // Sender doesn't have a consumer address at this time // So, free the consumer address when closing the connection - if (dtl_handle->consumer_address != NULL) - { - free(dtl_handle->consumer_address); + if (dtl_handle->consumer_address != NULL) { + free (dtl_handle->consumer_address); dtl_handle->consumer_address = NULL; dtl_handle->addr_len = 0; } @@ -660,18 +649,19 @@ dyad_rc_t dyad_dtl_ucx_close_connection(dyad_dtl_t *self) dtl_handle->comm_tag = 0; return DYAD_RC_OK; } else { - FLUX_LOG_ERR (dtl_handle->h, "Somehow, an invalid comm mode reached 'close_connection'\n"); + FLUX_LOG_ERR (dtl_handle->h, + "Somehow, an invalid comm mode reached " + "'close_connection'\n"); // TODO create new RC for this case return -1; } } -dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_t **self) +dyad_rc_t dyad_dtl_ucx_finalize (dyad_dtl_t** self) { dyad_dtl_ucx_t* dtl_handle = NULL; dyad_rc_t rc = DYAD_RC_OK; - if (self == NULL || *self == NULL || - (*self)->private.ucx_dtl_handle == NULL) { + if (self == NULL || *self == NULL || (*self)->private.ucx_dtl_handle == NULL) { return DYAD_RC_OK; } dtl_handle = (*self)->private.ucx_dtl_handle; @@ -682,27 +672,25 @@ dyad_rc_t dyad_dtl_ucx_finalize(dyad_dtl_t **self) } // Release consumer address if not already released if (dtl_handle->consumer_address != NULL) { - ucp_worker_release_address( - dtl_handle->ucx_worker, - dtl_handle->consumer_address - ); + ucp_worker_release_address (dtl_handle->ucx_worker, + dtl_handle->consumer_address); dtl_handle->consumer_address = NULL; } // Release worker if not already released if (dtl_handle->ucx_worker != NULL) { - ucp_worker_destroy(dtl_handle->ucx_worker); + ucp_worker_destroy (dtl_handle->ucx_worker); dtl_handle->ucx_worker = NULL; } // Release context if not already released if (dtl_handle->ucx_ctx != NULL) { - ucp_cleanup(dtl_handle->ucx_ctx); + ucp_cleanup (dtl_handle->ucx_ctx); dtl_handle->ucx_ctx = NULL; } // Flux handle should be released by the // DYAD context, so it is not released here dtl_handle->h = NULL; // Free the handle and set to NULL to prevent double free - free(dtl_handle); + free (dtl_handle); (*self)->private.ucx_dtl_handle = NULL; return DYAD_RC_OK; } diff --git a/src/dtl/ucx_dtl.h b/src/dtl/ucx_dtl.h index 658db384..90d79437 100644 --- a/src/dtl/ucx_dtl.h +++ b/src/dtl/ucx_dtl.h @@ -1,41 +1,45 @@ #ifndef DYAD_DTL_UCX_H #define DYAD_DTL_UCX_H -#include "dyad_dtl_impl.h" - +#include #include -#include +#include "dyad_dtl_impl.h" struct dyad_dtl_ucx { - flux_t *h; + flux_t* h; bool debug; ucp_context_h ucx_ctx; ucp_worker_h ucx_worker; ucp_ep_h ep; dyad_dtl_comm_mode_t curr_comm_mode; - ucp_address_t *consumer_address; + ucp_address_t* consumer_address; size_t addr_len; ucp_tag_t comm_tag; }; typedef struct dyad_dtl_ucx dyad_dtl_ucx_t; -dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t *self, - dyad_dtl_mode_t mode, flux_t *h, bool debug); +dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t* self, + dyad_dtl_mode_t mode, + flux_t* h, + bool debug); -dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, const char* restrict upath, - uint32_t producer_rank, json_t** restrict packed_obj); +dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, + const char* restrict upath, + uint32_t producer_rank, + json_t** restrict packed_obj); dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, - const flux_msg_t* msg, char** upath); + const flux_msg_t* msg, + char** upath); dyad_rc_t dyad_dtl_ucx_rpc_respond (dyad_dtl_t* self, const flux_msg_t* orig_msg); dyad_rc_t dyad_dtl_ucx_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f); -dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t *self, - dyad_dtl_comm_mode_t comm_mode); +dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, + dyad_dtl_comm_mode_t comm_mode); dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen); @@ -43,6 +47,6 @@ dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen); dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self); -dyad_rc_t dyad_dtl_ucx_finalize (dyad_dtl_t **self); +dyad_rc_t dyad_dtl_ucx_finalize (dyad_dtl_t** self); #endif /* DYAD_DTL_UCX_H */ \ No newline at end of file diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 5a8db126..38c25a99 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -17,12 +17,12 @@ #include #else #include +#include #include #include #include #include #include -#include #endif // defined(__cplusplus) #include @@ -30,10 +30,10 @@ #include #include +#include "dyad_dtl_impl.h" #include "dyad_rc.h" #include "read_all.h" #include "utils.h" -#include "dyad_dtl_impl.h" #define TIME_DIFF(Tstart, Tend) \ ((double)(1000000000L * ((Tend).tv_sec - (Tstart).tv_sec) + (Tend).tv_nsec \ @@ -48,13 +48,7 @@ struct dyad_mod_ctx { dyad_dtl_t *dtl_handle; }; -const struct dyad_mod_ctx dyad_mod_ctx_default = { - NULL, - false, - NULL, - NULL, - NULL -}; +const struct dyad_mod_ctx dyad_mod_ctx_default = {NULL, false, NULL, NULL, NULL}; typedef struct dyad_mod_ctx dyad_mod_ctx_t; @@ -84,7 +78,7 @@ static dyad_mod_ctx_t *getctx (flux_t *h) dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)flux_aux_get (h, "dyad"); if (!ctx) { - ctx = (dyad_mod_ctx_t *) malloc (sizeof (*ctx)); + ctx = (dyad_mod_ctx_t *)malloc (sizeof (*ctx)); if (ctx == NULL) { FLUX_LOG_ERR (h, "DYAD_MOD: could not allocate memory for context"); goto getctx_error; @@ -113,10 +107,11 @@ getctx_error:; #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -static void dyad_fetch_request_cb (flux_t *h, - flux_msg_handler_t *w, - const flux_msg_t *msg, - void *arg) +static void +dyad_fetch_request_cb (flux_t *h, + flux_msg_handler_t *w, + const flux_msg_t *msg, + void *arg) { FLUX_LOG_INFO (h, "Launched callback for dyad.fetch\n"); dyad_mod_ctx_t *ctx = getctx (h); @@ -138,14 +133,10 @@ static void dyad_fetch_request_cb (flux_t *h, goto fetch_error; FLUX_LOG_INFO (h, "DYAD_MOD: unpacking RPC message"); - - rc = ctx->dtl_handle->rpc_unpack ( - ctx->dtl_handle, - msg, - &upath - ); - - if (DYAD_IS_ERROR(rc)) { + + rc = ctx->dtl_handle->rpc_unpack (ctx->dtl_handle, msg, &upath); + + if (DYAD_IS_ERROR (rc)) { FLUX_LOG_ERR (ctx->h, "Could not unpack message from client\n"); errno = EPROTO; goto fetch_error; @@ -154,11 +145,8 @@ static void dyad_fetch_request_cb (flux_t *h, FLUX_LOG_INFO (h, "DYAD_MOD: requested user_path: %s", upath); FLUX_LOG_INFO (h, "DYAD_MOD: sending initial response to consumer"); - rc = ctx->dtl_handle->rpc_respond ( - ctx->dtl_handle, - msg - ); - if (DYAD_IS_ERROR(rc)) { + rc = ctx->dtl_handle->rpc_respond (ctx->dtl_handle, msg); + if (DYAD_IS_ERROR (rc)) { FLUX_LOG_ERR (ctx->h, "Could not send primary RPC response to client\n"); goto fetch_error; } @@ -168,8 +156,7 @@ static void dyad_fetch_request_cb (flux_t *h, #if DYAD_SPIN_WAIT if (!get_stat (fullpath, 1000U, 1000L)) { - FLUX_LOG_ERR (h, "DYAD_MOD: Failed to access info on \"%s\".\n", - fullpath); + FLUX_LOG_ERR (h, "DYAD_MOD: Failed to access info on \"%s\".\n", fullpath); // goto error; } #endif // DYAD_SPIN_WAIT @@ -185,28 +172,21 @@ static void dyad_fetch_request_cb (flux_t *h, goto fetch_error; } close (fd); - FLUX_LOG_INFO (h, "Is inbuf NULL? -> %i\n", (int) (inbuf == NULL)); + FLUX_LOG_INFO (h, "Is inbuf NULL? -> %i\n", (int)(inbuf == NULL)); FLUX_LOG_INFO (h, "Establish DTL connection with consumer"); - rc = ctx->dtl_handle->establish_connection ( - ctx->dtl_handle, - DYAD_COMM_SEND - ); - if (DYAD_IS_ERROR(rc)) { + rc = ctx->dtl_handle->establish_connection (ctx->dtl_handle, DYAD_COMM_SEND); + if (DYAD_IS_ERROR (rc)) { FLUX_LOG_ERR (ctx->h, "Could not establish DTL connection with client\n"); errno = ECONNREFUSED; goto fetch_error; } FLUX_LOG_INFO (h, "Send file to consumer with DTL"); - rc = ctx->dtl_handle->send ( - ctx->dtl_handle, - inbuf, - inlen - ); + rc = ctx->dtl_handle->send (ctx->dtl_handle, inbuf, inlen); FLUX_LOG_INFO (h, "Close DTL connection with consumer"); ctx->dtl_handle->close_connection (ctx->dtl_handle); - free(inbuf); - if (DYAD_IS_ERROR(rc)) { + free (inbuf); + if (DYAD_IS_ERROR (rc)) { FLUX_LOG_ERR (ctx->h, "Could not send data to client via DTL\n"); errno = ECOMM; goto fetch_error; @@ -214,7 +194,9 @@ static void dyad_fetch_request_cb (flux_t *h, FLUX_LOG_INFO (h, "Close RPC message stream with an ENODATA message"); if (flux_respond_error (h, msg, ENODATA, NULL) < 0) { - FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond_error with ENODATA failed\n", __FUNCTION__); + FLUX_LOG_ERR (h, + "DYAD_MOD: %s: flux_respond_error with ENODATA failed\n", + __FUNCTION__); } errno = saved_errno; FLUX_LOG_INFO (h, "Finished dyad.fetch module invocation\n"); @@ -236,33 +218,35 @@ static dyad_rc_t dyad_open (flux_t *h, dyad_dtl_mode_t dtl_mode, bool debug) char *e = NULL; ctx->debug = debug; - rc = dyad_dtl_init ( - &(ctx->dtl_handle), - dtl_mode, - h, - ctx->debug - ); + rc = dyad_dtl_init (&(ctx->dtl_handle), dtl_mode, h, ctx->debug); return rc; } -static const struct flux_msg_handler_spec htab[] = {{FLUX_MSGTYPE_REQUEST, - "dyad.fetch", - dyad_fetch_request_cb, 0}, - FLUX_MSGHANDLER_TABLE_END}; +static const struct flux_msg_handler_spec htab[] = + {{FLUX_MSGTYPE_REQUEST, "dyad.fetch", dyad_fetch_request_cb, 0}, + FLUX_MSGHANDLER_TABLE_END}; -void usage() +void usage () { - fprintf(stderr, "Usage: flux exec -r all flux module load dyad.so [DTL_MODE] [--debug | -d]\n\n"); - fprintf(stderr, "Required Arguments:\n"); - fprintf(stderr, "===================\n"); - fprintf(stderr, " * PRODUCER_PATH: the producer-managed path that the module should track\n\n"); - fprintf(stderr, "Optional Arguments:\n"); - fprintf(stderr, "===================\n"); - fprintf(stderr, " * DTL_MODE: a valid data transport layer (DTL) mode. Can be one of the following values\n"); - fprintf(stderr, " * UCX (default): use UCX to send data to consumer\n"); - fprintf(stderr, " * FLUX_RPC: use Flux's RPC response mechanism to send data to consumer\n"); - fprintf(stderr, " * --debug | -d: if provided, add debugging log messages\n"); + fprintf (stderr, + "Usage: flux exec -r all flux module load dyad.so " + "[DTL_MODE] [--debug | -d]\n\n"); + fprintf (stderr, "Required Arguments:\n"); + fprintf (stderr, "===================\n"); + fprintf (stderr, + " * PRODUCER_PATH: the producer-managed path that the module should " + "track\n\n"); + fprintf (stderr, "Optional Arguments:\n"); + fprintf (stderr, "===================\n"); + fprintf (stderr, + " * DTL_MODE: a valid data transport layer (DTL) mode. Can be one of the " + "following values\n"); + fprintf (stderr, " * UCX (default): use UCX to send data to consumer\n"); + fprintf (stderr, + " * FLUX_RPC: use Flux's RPC response mechanism to send data to " + "consumer\n"); + fprintf (stderr, " * --debug | -d: if provided, add debugging log messages\n"); } DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) @@ -284,7 +268,7 @@ DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) FLUX_LOG_ERR (ctx->h, "DYAD_MOD: Missing argument(s). " "Requires a local dyad path.\n"); - usage(); + usage (); goto mod_error; } (ctx->dyad_path) = argv[0]; @@ -292,21 +276,22 @@ DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) if (argc >= 2) { FLUX_LOG_INFO (h, "DTL Mode (from cmd line) is %s\n", argv[1]); - flag_len = strlen(argv[1]); + flag_len = strlen (argv[1]); if (strncmp (argv[1], "FLUX_RPC", flag_len) == 0) { dtl_mode = DYAD_DTL_FLUX_RPC; } else if (strncmp (argv[1], "UCX", flag_len) == 0) { dtl_mode = DYAD_DTL_UCX; } else { FLUX_LOG_ERR (ctx->h, "Invalid DTL mode provided\n"); - usage(); + usage (); goto mod_error; } } if (argc >= 3) { flag_len = strlen (argv[2]); - if (strncmp (argv[2], "--debug", flag_len) == 0 || strncmp (argv[2], "-d", flag_len) == 0) { + if (strncmp (argv[2], "--debug", flag_len) == 0 + || strncmp (argv[2], "-d", flag_len) == 0) { debug = true; } else { debug = false; @@ -321,8 +306,7 @@ DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) FLUX_LOG_INFO (ctx->h, "dyad module begins using \"%s\"\n", argv[0]); if (flux_msg_handler_addvec (ctx->h, htab, (void *)h, &ctx->handlers) < 0) { - FLUX_LOG_ERR (ctx->h, "flux_msg_handler_addvec: %s\n", - strerror (errno)); + FLUX_LOG_ERR (ctx->h, "flux_msg_handler_addvec: %s\n", strerror (errno)); goto mod_error; } diff --git a/src/modules/urpc.c b/src/modules/urpc.c index f51d5e81..19847552 100644 --- a/src/modules/urpc.c +++ b/src/modules/urpc.c @@ -106,8 +106,7 @@ urpc_exec_cmd (flux_t *h, const char *cmd, void **inbuf, ssize_t *inlen) fd = fileno (fp); if ((*inlen = read_all (fd, inbuf)) < 0) { - FLUX_LOG_ERR (h, "URPC_MOD: Failed to catch result from \"%s\".\n", - cmd); + FLUX_LOG_ERR (h, "URPC_MOD: Failed to catch result from \"%s\".\n", cmd); goto error; } @@ -223,14 +222,25 @@ urpc_execj_request_cb (flux_t *h, } if (!(jcmd = json_loads (cmd_json, 0, &error))) { - FLUX_LOG_ERR (h, "URPC_MOD: json error on line %d: %s\n", error.line, + FLUX_LOG_ERR (h, + "URPC_MOD: json error on line %d: %s\n", + error.line, error.text); goto error; } // TODO: generalize the format string. - rc = json_unpack (jcmd, "{s:s, s:[s, {s:s, s:s}, i]}", "cmd", &exec, "args", - &arg1, "file", &filename, "content", &content, &n); + rc = json_unpack (jcmd, + "{s:s, s:[s, {s:s, s:s}, i]}", + "cmd", + &exec, + "args", + &arg1, + "file", + &filename, + "content", + &content, + &n); if (rc) { FLUX_LOG_ERR (h, "URPC_MOD: could not unpack '%s'.\n", cmd_json); @@ -314,8 +324,7 @@ int mod_main (flux_t *h, int argc, char **argv) FLUX_LOG_INFO (ctx->h, "urpc module begins using \"%s\"\n", argv[0]); if (flux_msg_handler_addvec (ctx->h, htab, (void *)h, &ctx->handlers) < 0) { - FLUX_LOG_ERR (ctx->h, "flux_msg_handler_addvec: %s\n", - strerror (errno)); + FLUX_LOG_ERR (ctx->h, "flux_msg_handler_addvec: %s\n", strerror (errno)); goto error; } diff --git a/src/stream/dyad_stream_api.hpp b/src/stream/dyad_stream_api.hpp index 09a6880b..9cec061b 100644 --- a/src/stream/dyad_stream_api.hpp +++ b/src/stream/dyad_stream_api.hpp @@ -12,7 +12,7 @@ #define DYAD_STREAM_DYAD_STREAM_API_HPP #include // fsync -#include // realpath +#include // realpath #include #include #include @@ -43,8 +43,7 @@ template ().make_preferred ().filename ())> -using dyad_if_fs_path = - std::enable_if_t, _Result>; +using dyad_if_fs_path = std::enable_if_t, _Result>; #endif // c++17 filesystem // https://stackoverflow.com/questions/676787/how-to-do-fsync-on-an-ofstream @@ -149,11 +148,9 @@ class basic_ifstream_dyad using ifstream_dyad = basic_ifstream_dyad; using wifstream_dyad = basic_ifstream_dyad; -#if __cplusplus \ - < 201103L //---------------------------------------------------- +#if __cplusplus < 201103L //---------------------------------------------------- template -basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( - const dyad_stream_core& core) +basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad (const dyad_stream_core& core) : m_core (core) { m_stream = new basic_ifstream (); @@ -167,9 +164,8 @@ basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad () } template -basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( - const char* filename, - std::ios_base::openmode mode) +basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad (const char* filename, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filename); @@ -186,8 +182,7 @@ bool basic_ifstream_dyad<_CharT, _Traits>::is_open () } #else //----------------------------------------------------------------------- template -basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( - const dyad_stream_core& core) +basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad (const dyad_stream_core& core) : m_core (core) { m_stream = std::unique_ptr (new basic_ifstream ()); @@ -201,36 +196,31 @@ basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad () } template -basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( - const char* filename, - std::ios_base::openmode mode) +basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad (const char* filename, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filename); - m_stream = - std::unique_ptr (new basic_ifstream (filename, mode)); + m_stream = std::unique_ptr (new basic_ifstream (filename, mode)); if ((m_stream != nullptr) && (*m_stream)) { m_filename = std::string{filename}; } } template -basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( - const string& filename, - std::ios_base::openmode mode) +basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad (const string& filename, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filename.c_str ()); - m_stream = - std::unique_ptr (new basic_ifstream (filename, mode)); + m_stream = std::unique_ptr (new basic_ifstream (filename, mode)); if ((m_stream != nullptr) && (*m_stream)) { m_filename = filename; } } template -basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( - basic_ifstream_dyad&& rhs) +basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad (basic_ifstream_dyad&& rhs) : m_stream (std::move (rhs.m_stream)), m_core (std::move (rhs.m_core)) { } @@ -238,20 +228,19 @@ basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( #if (__cplusplus >= 201703L) && __has_include() template template -basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad ( - const _Path& filepath, - std::ios_base::openmode mode) +basic_ifstream_dyad<_CharT, _Traits>::basic_ifstream_dyad (const _Path& filepath, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filepath.c_str ()); - m_stream = std::unique_ptr ( - new basic_ifstream (filepath.c_str (), mode)); + m_stream = + std::unique_ptr (new basic_ifstream (filepath.c_str (), mode)); } #endif // c++17 filesystem template -basic_ifstream_dyad<_CharT, _Traits>& basic_ifstream_dyad<_CharT, _Traits>:: -operator= (basic_ifstream_dyad&& rhs) +basic_ifstream_dyad<_CharT, _Traits>& basic_ifstream_dyad<_CharT, _Traits>::operator= ( + basic_ifstream_dyad&& rhs) { m_stream = std::move (rhs.m_stream); m_core = std::move (rhs.m_core); @@ -330,8 +319,8 @@ std::filebuf* basic_ifstream_dyad<_CharT, _Traits>::rdbuf () const } template -std::basic_ifstream<_CharT, _Traits>& basic_ifstream_dyad<_CharT, _Traits>:: - get_stream () +std::basic_ifstream<_CharT, _Traits>& basic_ifstream_dyad<_CharT, + _Traits>::get_stream () { if (m_stream == nullptr) { // TODO: throw @@ -412,11 +401,9 @@ class basic_ofstream_dyad using ofstream_dyad = basic_ofstream_dyad; using wofstream_dyad = basic_ofstream_dyad; -#if __cplusplus \ - < 201103L //---------------------------------------------------- +#if __cplusplus < 201103L //---------------------------------------------------- template -basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( - const dyad_stream_core& core) +basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad (const dyad_stream_core& core) : m_core (core) { m_stream = new basic_ofstream (); @@ -430,9 +417,8 @@ basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad () } template -basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( - const char* filename, - std::ios_base::openmode mode) +basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad (const char* filename, + std::ios_base::openmode mode) { m_core.init (); m_stream = new basic_ofstream (filename, mode); @@ -448,8 +434,7 @@ bool basic_ofstream_dyad<_CharT, _Traits>::is_open () } #else //----------------------------------------------------------------------- template -basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( - const dyad_stream_core& core) +basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad (const dyad_stream_core& core) : m_core (core) { m_stream = std::unique_ptr (new basic_ofstream ()); @@ -463,34 +448,29 @@ basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad () } template -basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( - const char* filename, - std::ios_base::openmode mode) +basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad (const char* filename, + std::ios_base::openmode mode) { m_core.init (); - m_stream = - std::unique_ptr (new basic_ofstream (filename, mode)); + m_stream = std::unique_ptr (new basic_ofstream (filename, mode)); if ((m_stream != nullptr) && (*m_stream)) { m_filename = std::string{filename}; } } template -basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( - const string& filename, - std::ios_base::openmode mode) +basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad (const string& filename, + std::ios_base::openmode mode) { m_core.init (); - m_stream = - std::unique_ptr (new basic_ofstream (filename, mode)); + m_stream = std::unique_ptr (new basic_ofstream (filename, mode)); if ((m_stream != nullptr) && (*m_stream)) { m_filename = filename; } } template -basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( - basic_ofstream_dyad&& rhs) +basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad (basic_ofstream_dyad&& rhs) : m_stream (std::move (rhs.m_stream)), m_core (std::move (rhs.m_core)) { } @@ -498,19 +478,18 @@ basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( #if (__cplusplus >= 201703L) && __has_include() template template -basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad ( - const _Path& filepath, - std::ios_base::openmode mode) +basic_ofstream_dyad<_CharT, _Traits>::basic_ofstream_dyad (const _Path& filepath, + std::ios_base::openmode mode) { m_core.init (); - m_stream = std::unique_ptr ( - new basic_ofstream (filepath.c_str (), mode)); + m_stream = + std::unique_ptr (new basic_ofstream (filepath.c_str (), mode)); } #endif // c++17 filesystem template -basic_ofstream_dyad<_CharT, _Traits>& basic_ofstream_dyad<_CharT, _Traits>:: -operator= (basic_ofstream_dyad&& rhs) +basic_ofstream_dyad<_CharT, _Traits>& basic_ofstream_dyad<_CharT, _Traits>::operator= ( + basic_ofstream_dyad&& rhs) { m_stream = std::move (rhs.m_stream); m_core = std::move (rhs.m_core); @@ -601,8 +580,8 @@ std::filebuf* basic_ofstream_dyad<_CharT, _Traits>::rdbuf () const } template -std::basic_ofstream<_CharT, _Traits>& basic_ofstream_dyad<_CharT, _Traits>:: - get_stream () +std::basic_ofstream<_CharT, _Traits>& basic_ofstream_dyad<_CharT, + _Traits>::get_stream () { if (m_stream == nullptr) { // TODO: throw @@ -687,11 +666,9 @@ class basic_fstream_dyad using fstream_dyad = basic_fstream_dyad; using wfstream_dyad = basic_fstream_dyad; -#if __cplusplus \ - < 201103L //---------------------------------------------------- +#if __cplusplus < 201103L //---------------------------------------------------- template -basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( - const dyad_stream_core& core) +basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad (const dyad_stream_core& core) : m_core (core) { m_stream = new basic_fstream (); @@ -705,9 +682,8 @@ basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad () } template -basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( - const char* filename, - std::ios_base::openmode mode) +basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad (const char* filename, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filename); @@ -724,8 +700,7 @@ bool basic_fstream_dyad<_CharT, _Traits>::is_open () } #else //----------------------------------------------------------------------- template -basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( - const dyad_stream_core& core) +basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad (const dyad_stream_core& core) : m_core (core) { m_stream = std::unique_ptr (new basic_fstream ()); @@ -739,36 +714,31 @@ basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad () } template -basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( - const char* filename, - std::ios_base::openmode mode) +basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad (const char* filename, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filename); - m_stream = - std::unique_ptr (new basic_fstream (filename, mode)); + m_stream = std::unique_ptr (new basic_fstream (filename, mode)); if ((m_stream != nullptr) && (*m_stream)) { m_filename = std::string{filename}; } } template -basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( - const string& filename, - std::ios_base::openmode mode) +basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad (const string& filename, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filename.c_str ()); - m_stream = - std::unique_ptr (new basic_fstream (filename, mode)); + m_stream = std::unique_ptr (new basic_fstream (filename, mode)); if ((m_stream != nullptr) && (*m_stream)) { m_filename = filename; } } template -basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( - basic_fstream_dyad&& rhs) +basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad (basic_fstream_dyad&& rhs) : m_stream (std::move (rhs.m_stream)), m_core (std::move (rhs.m_core)) { } @@ -776,20 +746,19 @@ basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( #if (__cplusplus >= 201703L) && __has_include() template template -basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad ( - const _Path& filepath, - std::ios_base::openmode mode) +basic_fstream_dyad<_CharT, _Traits>::basic_fstream_dyad (const _Path& filepath, + std::ios_base::openmode mode) { m_core.init (); m_core.open_sync (filepath.c_str ()); - m_stream = std::unique_ptr ( - new basic_fstream (filepath.c_str (), mode)); + m_stream = + std::unique_ptr (new basic_fstream (filepath.c_str (), mode)); } #endif // c++17 filesystem template -basic_fstream_dyad<_CharT, _Traits>& basic_fstream_dyad<_CharT, _Traits>:: -operator= (basic_fstream_dyad&& rhs) +basic_fstream_dyad<_CharT, _Traits>& basic_fstream_dyad<_CharT, _Traits>::operator= ( + basic_fstream_dyad&& rhs) { m_stream = std::move (rhs.m_stream); m_core = std::move (rhs.m_core); @@ -881,8 +850,7 @@ std::filebuf* basic_fstream_dyad<_CharT, _Traits>::rdbuf () const } template -std::basic_fstream<_CharT, _Traits>& basic_fstream_dyad<_CharT, - _Traits>::get_stream () +std::basic_fstream<_CharT, _Traits>& basic_fstream_dyad<_CharT, _Traits>::get_stream () { if (m_stream == nullptr) { // TODO: throw diff --git a/src/stream/dyad_stream_core.cpp b/src/stream/dyad_stream_core.cpp index 9819bde2..9ccabce9 100644 --- a/src/stream/dyad_stream_core.cpp +++ b/src/stream/dyad_stream_core.cpp @@ -8,8 +8,9 @@ * SPDX-License-Identifier: LGPL-3.0 \************************************************************/ -#include "dyad_core.h" #include "dyad_stream_core.hpp" + +#include "dyad_core.h" #include "murmur3.h" #include "utils.h" @@ -26,7 +27,7 @@ #include #include using namespace std; // std::clock () -//#include // c++11 +// #include // c++11 #include #include @@ -79,8 +80,7 @@ void dyad_stream_core::init () m_is_prod = false; } - dyad_rc_t rc = - dyad_init_env (&m_ctx); + dyad_rc_t rc = dyad_init_env (&m_ctx); // TODO figure out if we want to error if init fails m_initialized = true; @@ -90,13 +90,16 @@ void dyad_stream_core::init () void dyad_stream_core::init (const dyad_params &p) { DPRINTF (m_ctx, "DYAD_WRAPPER: Initializeing DYAD wrapper\n"); - dyad_rc_t rc = - dyad_init (p.m_debug, false, p.m_shared_storage, p.m_key_depth, - p.m_key_bins, p.m_kvs_namespace.c_str (), - p.m_prod_managed_path.c_str (), - p.m_cons_managed_path.c_str (), - static_cast(p.m_dtl_mode), - &m_ctx); + dyad_rc_t rc = dyad_init (p.m_debug, + false, + p.m_shared_storage, + p.m_key_depth, + p.m_key_bins, + p.m_kvs_namespace.c_str (), + p.m_prod_managed_path.c_str (), + p.m_cons_managed_path.c_str (), + static_cast (p.m_dtl_mode), + &m_ctx); // TODO figure out if we want to error if init fails m_initialized = true; log_info ("Stream core is initialized by parameters"); @@ -105,18 +108,19 @@ void dyad_stream_core::init (const dyad_params &p) void dyad_stream_core::log_info (const std::string &msg_head) const { DYAD_LOG_INFO (m_ctx, "=== %s ===\n", msg_head.c_str ()); - DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_PATH_CONSUMER_ENV, - m_ctx->cons_managed_path); - DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_PATH_PRODUCER_ENV, - m_ctx->prod_managed_path); - DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_SYNC_DEBUG_ENV, + DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_PATH_CONSUMER_ENV, m_ctx->cons_managed_path); + DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_PATH_PRODUCER_ENV, m_ctx->prod_managed_path); + DYAD_LOG_INFO (m_ctx, + "%s=%s\n", + DYAD_SYNC_DEBUG_ENV, (m_ctx->debug) ? "true" : "false"); - DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_SHARED_STORAGE_ENV, + DYAD_LOG_INFO (m_ctx, + "%s=%s\n", + DYAD_SHARED_STORAGE_ENV, (m_ctx->shared_storage) ? "true" : "false"); DYAD_LOG_INFO (m_ctx, "%s=%u\n", DYAD_KEY_DEPTH_ENV, m_ctx->key_depth); DYAD_LOG_INFO (m_ctx, "%s=%u\n", DYAD_KEY_BINS_ENV, m_ctx->key_bins); - DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_KVS_NAMESPACE_ENV, - m_ctx->kvs_namespace); + DYAD_LOG_INFO (m_ctx, "%s=%s\n", DYAD_KVS_NAMESPACE_ENV, m_ctx->kvs_namespace); } bool dyad_stream_core::is_dyad_producer () diff --git a/src/urpc/urpc_client.c b/src/urpc/urpc_client.c index 317e9b79..45d66fee 100644 --- a/src/urpc/urpc_client.c +++ b/src/urpc/urpc_client.c @@ -21,7 +21,7 @@ #include #include using namespace std; // std::clock () -//#include // c++11 +// #include // c++11 #else #include #include @@ -109,8 +109,7 @@ static int urpc_rpc_pack (flux_future_t **ft, uint32_t rank, const char *cmd) { // Send the request to execute a command - if (!(*ft = flux_rpc_pack (ctx->h, "urpc.exec", rank, 0, "{s:s}", "cmd", - cmd))) { + if (!(*ft = flux_rpc_pack (ctx->h, "urpc.exec", rank, 0, "{s:s}", "cmd", cmd))) { FLUX_LOG_ERR ("flux_rpc_pack({urpc.exec %s})", cmd); return -1; } diff --git a/src/urpc/urpc_client.h b/src/urpc/urpc_client.h index cfd9e54a..6b224819 100644 --- a/src/urpc/urpc_client.h +++ b/src/urpc/urpc_client.h @@ -26,7 +26,7 @@ int urpc_client (uint32_t server_rank, } #endif // __cplusplus -#endif /* URPC_CLIENT_H */ +#endif /* URPC_CLIENT_H */ /* * vi: ts=4 sw=4 expandtab diff --git a/src/utils/read_all.h b/src/utils/read_all.h index bd1875ec..12ab1948 100644 --- a/src/utils/read_all.h +++ b/src/utils/read_all.h @@ -22,10 +22,11 @@ ssize_t write_all (int fd, const void *buf, size_t len); #if DYAD_PERFFLOW __attribute__ ((annotate ("@critical_path()"))) #endif -ssize_t read_all (int fd, void **bufp); +ssize_t +read_all (int fd, void **bufp); #if defined(__cplusplus) }; #endif // defined(__cplusplus) -#endif /* UTILS_READ_ALL_H */ +#endif /* UTILS_READ_ALL_H */ diff --git a/src/utils/utils.c b/src/utils/utils.c index d8fd80aa..713e23f9 100644 --- a/src/utils/utils.c +++ b/src/utils/utils.c @@ -10,7 +10,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE -#endif // _GNU_SOURCE +#endif // _GNU_SOURCE #include // open #include // basename dirname @@ -26,7 +26,7 @@ #include #include #include -//#include // c++11 +// #include // c++11 #else #include #include // PATH_MAX @@ -94,8 +94,7 @@ char* concat_str (char* __restrict__ str, bool con_end = false; if ((connector != NULL) && (str_len_org >= con_len)) { - con_end = - (strncmp (str + str_len_org - con_len, connector, con_len) == 0); + con_end = (strncmp (str + str_len_org - con_len, connector, con_len) == 0); } const size_t str_len = (con_end ? (str_len_org - con_len) : str_len_org); @@ -160,11 +159,9 @@ bool cmp_prefix (const char* __restrict__ prefix, { const char* const u_len_end = ((const char*)u_len) + sizeof (size_t); bool no_overlap = - ((prefix + strlen (prefix) <= (char*)u_len) - || (u_len_end <= prefix)) + ((prefix + strlen (prefix) <= (char*)u_len) || (u_len_end <= prefix)) && ((full + strlen (full) <= (char*)u_len) || (u_len_end <= full)) - && ((delim + strlen (delim) <= (char*)u_len) - || (u_len_end <= delim)); + && ((delim + strlen (delim) <= (char*)u_len) || (u_len_end <= delim)); if (!no_overlap) { DPRINTF ("DYAD UTIL: buffers overlap.\n"); @@ -232,9 +229,8 @@ bool cmp_canonical_path_prefix (const char* __restrict__ prefix, { { const char* const upath_end = upath + upath_capacity; - bool no_overlap = - ((prefix + strlen (prefix) <= upath) || (upath_end <= prefix)) - && ((path + strlen (path) <= upath) || (upath_end <= path)); + bool no_overlap = ((prefix + strlen (prefix) <= upath) || (upath_end <= prefix)) + && ((path + strlen (path) <= upath) || (upath_end <= path)); if (!no_overlap) { DPRINTF ("DYAD UTIL: buffers overlap\n"); @@ -321,10 +317,12 @@ int mkdir_as_needed (const char* path, const mode_t m) "Directory \"%s\" already exists with " "different permission bits %o from " "the requested %o\n", - path, (sb.st_mode & RWX_UGO), (m & RWX_UGO)); + path, + (sb.st_mode & RWX_UGO), + (m & RWX_UGO)); return 5; // already exists but with different mode } - return 1; // already exists + return 1; // already exists } IPRINTF ("Creating directory \"%s\"\n", path); @@ -339,13 +337,14 @@ int mkdir_as_needed (const char* path, const mode_t m) "Directory \"%s\" already exists with " "different permission bits %o from " "the requested %o\n", - path, (sb.st_mode & RWX_UGO), (m & RWX_UGO)); + path, + (sb.st_mode & RWX_UGO), + (m & RWX_UGO)); return 5; // already exists but with different mode } - return 1; // already exists + return 1; // already exists } - DPRINTF ("Cannot create directory \"%s\": %s\n", path, - strerror (errno)); + DPRINTF ("Cannot create directory \"%s\": %s\n", path, strerror (errno)); perror ("mkdir_as_needed() "); return -1; } @@ -353,7 +352,7 @@ int mkdir_as_needed (const char* path, const mode_t m) #if DYAD_SYNC_DIR sync_containing_dir (path); -#endif // DYAD_SYNC_DIR +#endif // DYAD_SYNC_DIR return 0; // The new directory has been succesfully created } @@ -371,11 +370,11 @@ int get_path (const int fd, const size_t max_size, char* path) ssize_t rc = readlink (proclink, path, max_size); if (rc < (ssize_t)0) { IPRINTF ("DYAD UTIL: error reading the file link (%s): %s\n", - strerror (errno), proclink); + strerror (errno), + proclink); return -1; } else if ((size_t)rc == max_size) { - IPRINTF ("DYAD UTIL: truncation might have happend with %s\n", - proclink); + IPRINTF ("DYAD UTIL: truncation might have happend with %s\n", proclink); } path[max_size + 1] = '\0'; @@ -446,7 +445,9 @@ int sync_containing_dir (const char* path) if (dir_fd < 0) { char errmsg[PATH_MAX + 256] = {'\0'}; - snprintf (errmsg, PATH_MAX + 256, "Failed to open directory %s\n", + snprintf (errmsg, + PATH_MAX + 256, + "Failed to open directory %s\n", containing_dir); perror (errmsg); return -1; // exit (SYS_ERR); @@ -454,16 +455,14 @@ int sync_containing_dir (const char* path) if (fsync (dir_fd) < 0) { char errmsg[PATH_MAX + 256] = {'\0'}; - snprintf (errmsg, PATH_MAX + 256, "Failed to fsync %s\n", - containing_dir); + snprintf (errmsg, PATH_MAX + 256, "Failed to fsync %s\n", containing_dir); perror (errmsg); return -1; // exit (SYS_ERR); } if (close (dir_fd) < 0) { char errmsg[PATH_MAX + 256] = {'\0'}; - snprintf (errmsg, PATH_MAX + 256, "Failed to close %s\n", - containing_dir); + snprintf (errmsg, PATH_MAX + 256, "Failed to close %s\n", containing_dir); perror (errmsg); return -1; // exit (SYS_ERR); } diff --git a/src/utils/utils.h b/src/utils/utils.h index 52173e82..734b17bd 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -18,7 +18,7 @@ #define DYAD_PATH_DELIM "/" #if defined(__cplusplus) -//#include // c++11 +// #include // c++11 #include #else #include diff --git a/src/wrapper/wrapper.c b/src/wrapper/wrapper.c index d146668c..829cfcb6 100644 --- a/src/wrapper/wrapper.c +++ b/src/wrapper/wrapper.c @@ -21,7 +21,7 @@ #include #include using namespace std; // std::clock () -//#include // c++11 +// #include // c++11 #else #include #include @@ -91,16 +91,20 @@ void dyad_wrapper_init (void) rc = dyad_init_env (&ctx); if (DYAD_IS_ERROR (rc)) { - fprintf(stderr, "Failed to initialize DYAD (code = %d)\n", rc); + fprintf (stderr, "Failed to initialize DYAD (code = %d)\n", rc); ctx->initialized = false; ctx->reenter = false; return; } DYAD_LOG_INFO (ctx, "DYAD Initialized\n"); - DYAD_LOG_INFO (ctx, "%s=%s\n", DYAD_SYNC_DEBUG_ENV, + DYAD_LOG_INFO (ctx, + "%s=%s\n", + DYAD_SYNC_DEBUG_ENV, (ctx->debug) ? "true" : "false"); - DYAD_LOG_INFO (ctx, "%s=%s\n", DYAD_SYNC_CHECK_ENV, + DYAD_LOG_INFO (ctx, + "%s=%s\n", + DYAD_SYNC_CHECK_ENV, (ctx->check) ? "true" : "false"); DYAD_LOG_INFO (ctx, "%s=%u\n", DYAD_KEY_DEPTH_ENV, ctx->key_depth); DYAD_LOG_INFO (ctx, "%s=%u\n", DYAD_KEY_BINS_ENV, ctx->key_bins); @@ -144,8 +148,7 @@ DYAD_DLL_EXPORTED int open (const char *path, int oflag, ...) } if (!(ctx && ctx->h) || (ctx && !ctx->reenter)) { - IPRINTF (ctx, "DYAD_SYNC: open sync not applicable for \"%s\".\n", - path); + IPRINTF (ctx, "DYAD_SYNC: open sync not applicable for \"%s\".\n", path); goto real_call; } @@ -183,7 +186,8 @@ DYAD_DLL_EXPORTED FILE *fopen (const char *path, const char *mode) } if (!(ctx && ctx->h) || (ctx && !ctx->reenter) || !path) { - IPRINTF (ctx, "DYAD_SYNC: fopen sync not applicable for \"%s\".\n", + IPRINTF (ctx, + "DYAD_SYNC: fopen sync not applicable for \"%s\".\n", ((path) ? path : "")); goto real_call; } @@ -221,13 +225,11 @@ DYAD_DLL_EXPORTED int close (int fd) if ((fd < 0) || (ctx == NULL) || (ctx->h == NULL) || !ctx->reenter) { #if defined(IPRINTF_DEFINED) if (ctx == NULL) { - IPRINTF (ctx, - "DYAD_SYNC: close sync not applicable. (no context)\n"); + IPRINTF (ctx, "DYAD_SYNC: close sync not applicable. (no context)\n"); } else if (ctx->h == NULL) { IPRINTF (ctx, "DYAD_SYNC: close sync not applicable. (no flux)\n"); } else if (!ctx->reenter) { - IPRINTF (ctx, - "DYAD_SYNC: close sync not applicable. (no reenter)\n"); + IPRINTF (ctx, "DYAD_SYNC: close sync not applicable. (no reenter)\n"); } else if (fd >= 0) { IPRINTF (ctx, "DYAD_SYNC: close sync not applicable. (invalid file " @@ -244,8 +246,7 @@ DYAD_DLL_EXPORTED int close (int fd) } if (get_path (fd, PATH_MAX - 1, path) < 0) { - IPRINTF (ctx, - "DYAD_SYNC: unable to obtain file path from a descriptor.\n"); + IPRINTF (ctx, "DYAD_SYNC: unable to obtain file path from a descriptor.\n"); to_sync = false; goto real_call; } @@ -266,15 +267,15 @@ real_call:; // semicolon here to avoid the error int wronly = is_wronly (fd); if (wronly == -1) { - DPRINTF (ctx, "Failed to check the mode of the file with fcntl: %s\n", + DPRINTF (ctx, + "Failed to check the mode of the file with fcntl: %s\n", strerror (errno)); } if (to_sync && wronly == 1) { rc = func_ptr (fd); if (rc != 0) { - DPRINTF (ctx, "Failed close (\"%s\").: %s\n", path, - strerror (errno)); + DPRINTF (ctx, "Failed close (\"%s\").: %s\n", path, strerror (errno)); } IPRINTF (ctx, "DYAD_SYNC: enters close sync (\"%s\").\n", path); if (DYAD_IS_ERROR (dyad_produce (ctx, path))) { @@ -311,13 +312,11 @@ DYAD_DLL_EXPORTED int fclose (FILE *fp) if ((fp == NULL) || (ctx == NULL) || (ctx->h == NULL) || !ctx->reenter) { #if defined(IPRINTF_DEFINED) if (ctx == NULL) { - IPRINTF (ctx, - "DYAD_SYNC: fclose sync not applicable. (no context)\n"); + IPRINTF (ctx, "DYAD_SYNC: fclose sync not applicable. (no context)\n"); } else if (ctx->h == NULL) { IPRINTF (ctx, "DYAD_SYNC: fclose sync not applicable. (no flux)\n"); } else if (!ctx->reenter) { - IPRINTF (ctx, - "DYAD_SYNC: fclose sync not applicable. (no reenter)\n"); + IPRINTF (ctx, "DYAD_SYNC: fclose sync not applicable. (no reenter)\n"); } else if (fp == NULL) { IPRINTF (ctx, "DYAD_SYNC: fclose sync not applicable. (invalid file " @@ -334,8 +333,7 @@ DYAD_DLL_EXPORTED int fclose (FILE *fp) } if (get_path (fileno (fp), PATH_MAX - 1, path) < 0) { - IPRINTF (ctx, - "DYAD_SYNC: unable to obtain file path from a descriptor.\n"); + IPRINTF (ctx, "DYAD_SYNC: unable to obtain file path from a descriptor.\n"); to_sync = false; goto real_call; } @@ -356,7 +354,8 @@ real_call:; int wronly = is_wronly (fd); if (wronly == -1) { - DPRINTF (ctx, "Failed to check the mode of the file with fcntl: %s\n", + DPRINTF (ctx, + "Failed to check the mode of the file with fcntl: %s\n", strerror (errno)); } From 19a345c0e098011969ef483744230dc9b93dff1c Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 20:12:45 -0700 Subject: [PATCH 09/18] Adds macro to check Flux function return codes and adds more returns codes for DYAD based on DTL --- src/dtl/dyad_rc.h | 15 ++++++++++----- src/dtl/flux_dtl.c | 10 +++++----- src/dtl/ucx_dtl.c | 10 +++++----- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/dtl/dyad_rc.h b/src/dtl/dyad_rc.h index 881be69a..bd9a9887 100644 --- a/src/dtl/dyad_rc.h +++ b/src/dtl/dyad_rc.h @@ -25,19 +25,24 @@ enum dyad_core_return_codes { DYAD_RC_BADFIO = -9, // File I/O failed DYAD_RC_BADMANAGEDPATH = -10, // Cons or Prod Manged Path is bad DYAD_RC_BADDTLMODE = -11, // Invalid DYAD DTL mode provided - DYAD_RC_BADPACK = -12, // JSON RPC payload packing failed - DYAD_RC_UCXINIT_FAIL = -13, // UCX initialization failed - DYAD_RC_UCXWAIT_FAIL = -14, // UCX wait (either custom or + DYAD_RC_BADPACK = -12, // JSON packing failed + DYAD_RC_BADUNPACK = -13, // JSON unpacking failed + DYAD_RC_UCXINIT_FAIL = -14, // UCX initialization failed + DYAD_RC_UCXWAIT_FAIL = -15, // UCX wait (either custom or // 'ucp_worker_wait') failed - DYAD_RC_UCXCOMM_FAIL = -15, // UCX communication routine failed - DYAD_RC_RPC_FINISHED = -16, // The Flux RPC responded with ENODATA (i.e., + DYAD_RC_UCXCOMM_FAIL = -16, // UCX communication routine failed + DYAD_RC_RPC_FINISHED = -17, // The Flux RPC responded with ENODATA (i.e., // end of stream) sooner than expected + DYAD_RC_BAD_B64DECODE = -18, // Decoding of data w/ base64 failed + DYAD_RC_BAD_COMM_MODE = -19, // Invalid comm mode provided to DTL }; typedef enum dyad_core_return_codes dyad_rc_t; #define DYAD_IS_ERROR(code) ((code) < 0) +#define FLUX_IS_ERROR(code) ((code) < 0) + #ifdef __cplusplus } #endif diff --git a/src/dtl/flux_dtl.c b/src/dtl/flux_dtl.c index 39b4930d..7de97e2a 100644 --- a/src/dtl/flux_dtl.c +++ b/src/dtl/flux_dtl.c @@ -45,13 +45,13 @@ dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char** upath) { - int errcode = 0; - errcode = flux_request_unpack (msg, NULL, "{s:s}", "upath", upath); - if (errcode < 0) { + int rc = 0; + rc = flux_request_unpack (msg, NULL, "{s:s}", "upath", upath); + if (FLUX_IS_ERROR (rc)) { FLUX_LOG_ERR (self->private.flux_dtl_handle->h, "Could not unpack Flux message from consumer\n"); // TODO create new RC for this - return -1; + return DYAD_RC_BADUNPACK; } self->private.flux_dtl_handle->msg = msg; return DYAD_RC_OK; @@ -106,7 +106,7 @@ dyad_rc_t dyad_dtl_flux_recv (dyad_dtl_t* self, void** buf, size_t* buflen) FLUX_LOG_ERR (dtl_handle->h, "Cannot get data using RPC without a Flux future\n"); // TODO create new RC for this - return -1; + return DYAD_RC_FLUXFAIL; } rc = flux_rpc_get_raw (dtl_handle->f, (const void**)buf, (int*)buflen); if (rc < 0) { diff --git a/src/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c index df691962..cace5504 100644 --- a/src/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -326,7 +326,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, &enc_addr_len); if (errcode < 0) { FLUX_LOG_ERR (dtl_handle->h, "Could not unpack Flux message from consumer!\n"); - return -1; + return DYAD_RC_BADUNPACK; } dtl_handle->comm_tag = ((uint64_t)tag_prod << 32) | (uint64_t)tag_cons; FLUX_LOG_INFO (dtl_handle->h, "Obtained upath from RPC payload: %s\n", upath); @@ -350,7 +350,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, free (dtl_handle->consumer_address); dtl_handle->consumer_address = NULL; dtl_handle->addr_len = 0; - return -1; + return DYAD_RC_BAD_B64DECODE; } return DYAD_RC_OK; } @@ -387,7 +387,7 @@ dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, FLUX_LOG_ERR (dtl_handle->h, "ucp_ep_create failed with status %d\n", (int)status); - return -1; + return DYAD_RC_UCXCOMM_FAIL; } if (dtl_handle->debug) { ucp_ep_print_info (dtl_handle->ep, stderr); @@ -401,7 +401,7 @@ dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, } else { FLUX_LOG_ERR (dtl_handle->h, "Invalid communication mode: %d\n", comm_mode); // TODO create new RC for this - return -1; + return DYAD_RC_BAD_COMM_MODE; } } @@ -653,7 +653,7 @@ dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self) "Somehow, an invalid comm mode reached " "'close_connection'\n"); // TODO create new RC for this case - return -1; + return DYAD_RC_BAD_COMM_MODE; } } From 22c872061d90f13bb93c8ade7f85687105cd3aba Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Sat, 29 Jul 2023 11:03:43 -0700 Subject: [PATCH 10/18] Changes default DTL mode to Flux RPC --- src/core/dyad_core.c | 4 ++-- src/dtl/dyad_dtl.h | 2 ++ src/modules/dyad.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index e31f516d..cfda01a0 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -718,10 +718,10 @@ dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) Defaulting to UCX\n", DYAD_DTL_MODE_ENV); } - dtl_mode = DYAD_DTL_UCX; + dtl_mode = DYAD_DTL_DEFAULT; } } else { - dtl_mode = DYAD_DTL_UCX; + dtl_mode = DYAD_DTL_DEFAULT; } if (debug) fprintf (stderr, diff --git a/src/dtl/dyad_dtl.h b/src/dtl/dyad_dtl.h index 9e1cecec..e2bbc13f 100644 --- a/src/dtl/dyad_dtl.h +++ b/src/dtl/dyad_dtl.h @@ -8,6 +8,8 @@ extern "C" { enum dyad_dtl_mode { DYAD_DTL_UCX = 0, DYAD_DTL_FLUX_RPC = 1, DYAD_DTL_END = 2 }; typedef enum dyad_dtl_mode dyad_dtl_mode_t; +static const dyad_dtl_mode_t DYAD_DTL_DEFAULT = DYAD_DTL_FLUX_RPC; + struct dyad_dtl; #ifdef __cplusplus diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 38c25a99..3b848b2f 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -254,7 +254,7 @@ DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) const mode_t m = (S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH | S_ISGID); dyad_mod_ctx_t *ctx = NULL; size_t flag_len = 0; - dyad_dtl_mode_t dtl_mode = DYAD_DTL_UCX; + dyad_dtl_mode_t dtl_mode = DYAD_DTL_DEFAULT; bool debug = false; if (!h) { From 726138d04a65bfa3124818e61d3b59b6ea92db15 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Sat, 29 Jul 2023 11:09:25 -0700 Subject: [PATCH 11/18] Small fix to flux_dtl --- src/dtl/flux_dtl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dtl/flux_dtl.c b/src/dtl/flux_dtl.c index 7de97e2a..7bd20f7c 100644 --- a/src/dtl/flux_dtl.c +++ b/src/dtl/flux_dtl.c @@ -1,6 +1,6 @@ #include "flux_dtl.h" -dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* dtl_handle, +dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* self, dyad_dtl_mode_t mode, flux_t* h, bool debug) From 7aecd5978e4644ea61780b4d7b7a4dff13d2ceec Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Sun, 30 Jul 2023 17:24:59 -0700 Subject: [PATCH 12/18] Logging improvements to module --- src/modules/dyad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 3b848b2f..20cfb115 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -192,7 +192,7 @@ dyad_fetch_request_cb (flux_t *h, goto fetch_error; } - FLUX_LOG_INFO (h, "Close RPC message stream with an ENODATA message"); + FLUX_LOG_INFO (h, "Close RPC message stream with an ENODATA (%d) message", ENODATA); if (flux_respond_error (h, msg, ENODATA, NULL) < 0) { FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond_error with ENODATA failed\n", @@ -203,7 +203,7 @@ dyad_fetch_request_cb (flux_t *h, return; fetch_error: - FLUX_LOG_INFO (h, "Close RPC message stream with an error (errno = %d)\n", errno); + FLUX_LOG_ERR (h, "Close RPC message stream with an error (errno = %d)\n", errno); if (flux_respond_error (h, msg, errno, NULL) < 0) { FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond_error", __FUNCTION__); } From ba8633a36aead327849e068d1d20350aa78192d4 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 11:35:59 -0700 Subject: [PATCH 13/18] Adds Caliper support to DYAD --- configure.ac | 13 +++ src/core/dyad_core.c | 154 +++++++++++++++++++++++++++++++- src/dtl/dyad_dtl_impl.c | 43 +++++++++ src/dtl/flux_dtl.c | 57 +++++++++++- src/dtl/ucx_dtl.c | 125 +++++++++++++++++++++++++- src/modules/dyad.c | 49 ++++++++++ src/stream/dyad_stream_core.cpp | 19 ++++ src/wrapper/wrapper.c | 59 +++++++++++- 8 files changed, 510 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index 371f3c95..3f6521e6 100644 --- a/configure.ac +++ b/configure.ac @@ -72,6 +72,12 @@ AC_ARG_ENABLE([perfflow], [enable_perfflow=$enableval], [enable_perfflow=no] ) +AC_ARG_ENABLE(caliper, + [AS_HELP_STRING([--enable-caliper[=OPTS]], + [Use caliper for profiling. [default=no] [OPTS=no/yes]])], + , + [enable_caliper="no"] +) # TODO Add support for libb64 back once base64 encoding/decoding is fully complete # AC_ARG_VAR([LIBB64_DIR], [root directory for libb64]) @@ -134,6 +140,13 @@ AX_PERFFLOW_ASPECT([PERFFLOW], if test "x$enable_perfflow" = "xyes" && test "x$pkg_check_perfflow_found" = "xno"; then AC_MSG_ERROR([requested PerfFlow Aspect support, but cannot find PerfFlow Aspect with pkg-config]) fi +if test "$enable_caliper" = "yes"; then + PKG_CHECK_MODULES([CALIPER], [caliper], [], []) + CFLAGS="${CFLAGS} ${CALIPER_CFLAGS} " + # Do not use CALIPER_LIBS, only link to libcaliper-stub + LIBS="${LIBS} ${CALIPER_LIBS} -lrt " + AC_DEFINE([HAVE_CALIPER], [1], [Define if you have libcaliper]) +fi AM_CONDITIONAL([PERFFLOW], [test "x$enable_perfflow" = "xyes"]) ########################### diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index cfda01a0..51bfbbb8 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -1,5 +1,9 @@ #include "dyad_core.h" +#if HAVE_CALIPER +#include +#endif + #include #include @@ -90,6 +94,9 @@ __attribute__ ((annotate ("@critical_path()"))) static dyad_rc_t dyad_kvs_commit static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, flux_kvs_txn_t* txn) #endif { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif flux_future_t* f = NULL; DYAD_LOG_INFO (ctx, "Committing transaction to KVS\n"); // Commit the transaction to the Flux KVS @@ -97,6 +104,9 @@ static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, flux_kvs_txn_t* // If the commit failed, log an error and return DYAD_BADCOMMIT if (f == NULL) { DYAD_LOG_ERR (ctx, "Could not commit transaction to Flux KVS\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_BADCOMMIT; } // If the commit is pending, wait for it to complete @@ -104,6 +114,9 @@ static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, flux_kvs_txn_t* // Once the commit is complete, destroy the future and transaction flux_future_destroy (f); f = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } @@ -116,6 +129,9 @@ static inline dyad_rc_t publish_via_flux (const dyad_ctx_t* restrict ctx, const char* restrict upath) #endif { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif dyad_rc_t rc = DYAD_RC_OK; flux_kvs_txn_t* txn = NULL; const size_t topic_len = PATH_MAX; @@ -154,6 +170,9 @@ publish_done:; if (txn != NULL) { flux_kvs_txn_destroy (txn); } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } @@ -166,6 +185,9 @@ static inline dyad_rc_t dyad_commit (dyad_ctx_t* restrict ctx, const char* restrict fname) #endif { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif dyad_rc_t rc = DYAD_RC_OK; char upath[PATH_MAX]; memset (upath, 0, PATH_MAX); @@ -194,6 +216,9 @@ commit_done:; if (rc == DYAD_RC_OK && (ctx && ctx->check)) { setenv (DYAD_CHECK_ENV, "ok", 1); } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } @@ -210,6 +235,9 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, flux_future_t** f) #endif { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif dyad_rc_t rc = DYAD_RC_OK; // Lookup information about the desired file (represented by kvs_topic) // from the Flux KVS. If there is no information, wait for it to be @@ -221,6 +249,9 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, // If the KVS lookup failed, log an error and return DYAD_BADLOOKUP if (*f == NULL) { DYAD_LOG_ERR (ctx, "KVS lookup failed!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_BADLOOKUP; } // Extract the rank of the producer from the KVS response @@ -229,8 +260,14 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, // If the extraction did not work, log an error and return DYAD_BADFETCH if (rc < 0) { DYAD_LOG_ERR (ctx, "Could not unpack owner's rank from KVS response\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_BADFETCH; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } @@ -245,6 +282,9 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, dyad_kvs_response_t** restrict resp) #endif { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif dyad_rc_t rc = DYAD_RC_OK; char upath[PATH_MAX]; uint32_t owner_rank = 0; @@ -258,6 +298,9 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, // This relative path will be stored in upath if (!cmp_canonical_path_prefix (ctx->cons_managed_path, fname, upath, PATH_MAX)) { DYAD_LOG_INFO (ctx, "%s is not in the Consumer's managed path\n", fname); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } DYAD_LOG_INFO (ctx, @@ -319,6 +362,9 @@ fetch_done:; flux_future_destroy (f); f = NULL; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } @@ -335,6 +381,9 @@ static inline dyad_rc_t dyad_get_data (const dyad_ctx_t* ctx, size_t* file_len) #endif { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif dyad_rc_t rc = DYAD_RC_OK; dyad_rc_t final_rc = DYAD_RC_OK; flux_future_t* f; @@ -412,6 +461,9 @@ get_done:; } DYAD_LOG_INFO (ctx, "Destroy the Flux future for the RPC\n"); flux_future_destroy (f); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } @@ -424,6 +476,9 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, const dyad_kvs_response_t* restrict kvs_data) #endif { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif dyad_rc_t rc = DYAD_RC_OK; const char* file_data = NULL; size_t file_len = 0; @@ -489,6 +544,9 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, // DYAD_CHECK_ENV environment variable to "ok" if (rc == DYAD_RC_OK && (ctx && ctx->check)) setenv (DYAD_CHECK_ENV, "ok", 1); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } @@ -503,6 +561,9 @@ dyad_rc_t dyad_init (bool debug, dyad_dtl_mode_t dtl_mode, dyad_ctx_t** ctx) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif dyad_rc_t rc = DYAD_RC_OK; // If ctx is NULL, we won't be able to return a dyad_ctx_t // to the user. In that case, print an error and return @@ -512,6 +573,9 @@ dyad_rc_t dyad_init (bool debug, "'ctx' argument to dyad_init is NULL! This prevents us from " "returning " "a dyad_ctx_t object!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } // Check if the actual dyad_ctx_t object is not NULL. @@ -523,6 +587,9 @@ dyad_rc_t dyad_init (bool debug, if ((*ctx)->initialized) { // TODO Indicate already initialized DPRINTF ((*ctx), "DYAD context already initialized\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } } else { @@ -531,6 +598,9 @@ dyad_rc_t dyad_init (bool debug, *ctx = (dyad_ctx_t*)malloc (sizeof (struct dyad_ctx)); if (*ctx == NULL) { fprintf (stderr, "Could not allocate DYAD context!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } } @@ -543,6 +613,9 @@ dyad_rc_t dyad_init (bool debug, fprintf (stderr, "Warning: no managed path provided! DYAD will not do " "anything!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } // Set the values in dyad_ctx_t that don't need allocation @@ -556,6 +629,9 @@ dyad_rc_t dyad_init (bool debug, (*ctx)->h = flux_open (NULL, 0); if ((*ctx)->h == NULL) { fprintf (stderr, "Could not open Flux handle!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_FLUXFAIL; } // Get the rank of the Flux broker corresponding @@ -563,6 +639,9 @@ dyad_rc_t dyad_init (bool debug, FLUX_LOG_INFO ((*ctx)->h, "DYAD_CORE: getting Flux rank"); if (flux_get_rank ((*ctx)->h, &((*ctx)->rank)) < 0) { FLUX_LOG_ERR ((*ctx)->h, "Could not get Flux rank!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_FLUXFAIL; } // If the namespace is provided, copy it into the dyad_ctx_t object @@ -570,6 +649,9 @@ dyad_rc_t dyad_init (bool debug, if (kvs_namespace == NULL) { FLUX_LOG_ERR ((*ctx)->h, "No KVS namespace provided!\n"); // TODO see if we want a different return val +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } const size_t namespace_len = strlen (kvs_namespace); @@ -578,6 +660,9 @@ dyad_rc_t dyad_init (bool debug, FLUX_LOG_ERR ((*ctx)->h, "Could not allocate buffer for KVS namespace!\n"); free (*ctx); *ctx = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } strncpy ((*ctx)->kvs_namespace, kvs_namespace, namespace_len + 1); @@ -587,6 +672,9 @@ dyad_rc_t dyad_init (bool debug, rc = dyad_dtl_init (&(*ctx)->dtl_handle, dtl_mode, (*ctx)->h, (*ctx)->debug); if (DYAD_IS_ERROR (rc)) { FLUX_LOG_ERR ((*ctx)->h, "Cannot initialize the DTL\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } // If the producer-managed path is provided, copy it into @@ -604,6 +692,9 @@ dyad_rc_t dyad_init (bool debug, free ((*ctx)->kvs_namespace); free (*ctx); *ctx = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } strncpy ((*ctx)->prod_managed_path, prod_managed_path, prod_path_len + 1); @@ -624,6 +715,9 @@ dyad_rc_t dyad_init (bool debug, free ((*ctx)->prod_managed_path); free (*ctx); *ctx = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } strncpy ((*ctx)->cons_managed_path, cons_managed_path, cons_path_len + 1); @@ -633,12 +727,18 @@ dyad_rc_t dyad_init (bool debug, (*ctx)->reenter = true; (*ctx)->initialized = true; // TODO Print logging info +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) { - char* e = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif + char *e = NULL; bool debug = false; bool check = false; bool shared_storage = false; @@ -649,6 +749,7 @@ dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) char* cons_managed_path = NULL; size_t dtl_mode_env_len = 0; dyad_dtl_mode_t dtl_mode = DYAD_DTL_UCX; + dyad_rc_t rc = DYAD_RC_OK; if ((e = getenv (DYAD_SYNC_DEBUG_ENV))) { debug = true; @@ -727,7 +828,7 @@ dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) fprintf (stderr, "DYAD_CORE: retrieved configuration from environment. Now " "initializing DYAD\n"); - return dyad_init (debug, + rc = dyad_init (debug, check, shared_storage, key_depth, @@ -737,36 +838,65 @@ dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) cons_managed_path, dtl_mode, ctx); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif + return rc; } dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION + dyad_rc_t rc = DYAD_RC_OK; +#endif // If the context is not defined, then it is not valid. // So, return DYAD_NOCTX if (!ctx || !ctx->h) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } // If the producer-managed path is NULL or empty, then the context is not // valid for a producer operation. So, return DYAD_BADMANAGEDPATH if (ctx->prod_managed_path == NULL || strlen (ctx->prod_managed_path) == 0) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_BADMANAGEDPATH; } // If the context is valid, call dyad_commit to perform // the producer operation +#if HAVE_CALIPER + rc = dyad_commit (ctx, fname); + CALI_MARK_FUNCTION_END + return rc; +#else return dyad_commit (ctx, fname); +#endif } dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) { - int rc = 0; +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif + dyad_rc_t rc = DYAD_RC_OK; // If the context is not defined, then it is not valid. // So, return DYAD_NOCTX if (!ctx || !ctx->h) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_NOCTX; } // If the consumer-managed path is NULL or empty, then the context is not // valid for a consumer operation. So, return DYAD_BADMANAGEDPATH if (ctx->cons_managed_path == NULL || strlen (ctx->cons_managed_path) == 0) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_BADMANAGEDPATH; } // Set reenter to false to avoid recursively performing @@ -811,12 +941,21 @@ dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) consume_done:; // Set reenter to true to allow additional intercepting ctx->reenter = true; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } int dyad_finalize (dyad_ctx_t** ctx) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif if (ctx == NULL || *ctx == NULL) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } dyad_dtl_finalize (&(*ctx)->dtl_handle); @@ -838,6 +977,9 @@ int dyad_finalize (dyad_ctx_t** ctx) } free (*ctx); *ctx = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return DYAD_RC_OK; } @@ -847,6 +989,9 @@ __attribute__((annotate("@critical_path()"))) #endif int dyad_sync_directory(dyad_ctx_t* restrict ctx, const char* restrict path) { // Flush new directory entry https://lwn.net/Articles/457671/ +#if HAVE_CALIPER + CALI_MARK_FUNCTION +#endif char path_copy[PATH_MAX + 1]; int odir_fd = -1; char* odir = NULL; @@ -876,6 +1021,9 @@ int dyad_sync_directory(dyad_ctx_t* restrict ctx, const char* restrict path) } if (ctx != NULL) ctx->reenter = reenter; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END +#endif return rc; } #endif diff --git a/src/dtl/dyad_dtl_impl.c b/src/dtl/dyad_dtl_impl.c index dd002909..1f4a1e80 100644 --- a/src/dtl/dyad_dtl_impl.c +++ b/src/dtl/dyad_dtl_impl.c @@ -3,46 +3,80 @@ #include "flux_dtl.h" #include "ucx_dtl.h" +#if HAVE_CALIPER +#include +#endif + dyad_rc_t dyad_dtl_init (dyad_dtl_t **dtl_handle, dyad_dtl_mode_t mode, flux_t *h, bool debug) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif dyad_rc_t rc = DYAD_RC_OK; *dtl_handle = malloc (sizeof (struct dyad_dtl)); if (*dtl_handle == NULL) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return DYAD_RC_SYSFAIL; } (*dtl_handle)->mode = mode; if (mode == DYAD_DTL_UCX) { rc = dyad_dtl_ucx_init (*dtl_handle, mode, h, debug); if (DYAD_IS_ERROR (rc)) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return rc; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return DYAD_RC_OK; } else if (mode == DYAD_DTL_FLUX_RPC) { rc = dyad_dtl_flux_init (*dtl_handle, mode, h, debug); if (DYAD_IS_ERROR (rc)) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return rc; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return DYAD_RC_OK; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return DYAD_RC_BADDTLMODE; } dyad_rc_t dyad_dtl_finalize (dyad_dtl_t **dtl_handle) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif dyad_rc_t rc = DYAD_RC_OK; if (dtl_handle == NULL || *dtl_handle == NULL) // We should only reach this line if the user has passed // in an already-finalized DTL handle. In that case, // this function should be treated as a no-op, and we // should return DYAD_RC_OK to indicate no error has occured +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return DYAD_RC_OK; if ((*dtl_handle)->mode == DYAD_DTL_UCX) { if ((*dtl_handle)->private.ucx_dtl_handle != NULL) { rc = dyad_dtl_ucx_finalize (dtl_handle); if (DYAD_IS_ERROR (rc)) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return rc; } } @@ -50,13 +84,22 @@ dyad_rc_t dyad_dtl_finalize (dyad_dtl_t **dtl_handle) if ((*dtl_handle)->private.flux_dtl_handle != NULL) { rc = dyad_dtl_flux_finalize (dtl_handle); if (DYAD_IS_ERROR (rc)) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return rc; } } } else { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return DYAD_RC_BADDTLMODE; } free (*dtl_handle); *dtl_handle = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_FUNCTION_END; +#endif return DYAD_RC_OK; } diff --git a/src/dtl/flux_dtl.c b/src/dtl/flux_dtl.c index 7bd20f7c..cba4bb59 100644 --- a/src/dtl/flux_dtl.c +++ b/src/dtl/flux_dtl.c @@ -1,13 +1,23 @@ #include "flux_dtl.h" +#if HAVE_CALIPER +#include +#endif + dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* self, dyad_dtl_mode_t mode, flux_t* h, bool debug) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif self->private.flux_dtl_handle = malloc (sizeof (struct dyad_dtl_flux)); if (self->private.flux_dtl_handle == NULL) { FLUX_LOG_ERR (h, "Cannot allocate the Flux DTL handle\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_SYSFAIL; } self->private.flux_dtl_handle->h = h; @@ -24,6 +34,9 @@ dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* self, self->recv = dyad_dtl_flux_recv; self->close_connection = dyad_dtl_flux_close_connection; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } @@ -32,12 +45,21 @@ dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, uint32_t producer_rank, json_t** restrict packed_obj) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; *packed_obj = json_pack ("{s:s}", "upath", upath); if (*packed_obj == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath for Flux DTL\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BADPACK; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } @@ -45,15 +67,24 @@ dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char** upath) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif int rc = 0; rc = flux_request_unpack (msg, NULL, "{s:s}", "upath", upath); if (FLUX_IS_ERROR (rc)) { FLUX_LOG_ERR (self->private.flux_dtl_handle->h, "Could not unpack Flux message from consumer\n"); // TODO create new RC for this +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BADUNPACK; } self->private.flux_dtl_handle->msg = msg; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } @@ -76,6 +107,9 @@ dyad_rc_t dyad_dtl_flux_establish_connection (dyad_dtl_t* self, dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif int errcode = 0; FLUX_LOG_INFO (self->private.flux_dtl_handle->h, "Send data to consumer using a Flux RPC response"); @@ -87,17 +121,26 @@ dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen) FLUX_LOG_ERR (self->private.flux_dtl_handle->h, "Could not send Flux RPC response containing file " "contents\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_FLUXFAIL; } if (self->private.flux_dtl_handle->debug) { FLUX_LOG_INFO (self->private.flux_dtl_handle->h, "Successfully sent file contents to consumer\n"); } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } dyad_rc_t dyad_dtl_flux_recv (dyad_dtl_t* self, void** buf, size_t* buflen) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif int rc = 0; errno = 0; dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; @@ -106,15 +149,27 @@ dyad_rc_t dyad_dtl_flux_recv (dyad_dtl_t* self, void** buf, size_t* buflen) FLUX_LOG_ERR (dtl_handle->h, "Cannot get data using RPC without a Flux future\n"); // TODO create new RC for this +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_FLUXFAIL; } rc = flux_rpc_get_raw (dtl_handle->f, (const void**)buf, (int*)buflen); if (rc < 0) { FLUX_LOG_ERR (dtl_handle->h, "Could not get file data from Flux RPC\n"); if (errno == ENODATA) +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_RPC_FINISHED; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BADRPC; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } @@ -137,4 +192,4 @@ dyad_rc_t dyad_dtl_flux_finalize (dyad_dtl_t** self) free ((*self)->private.flux_dtl_handle); (*self)->private.flux_dtl_handle = NULL; return DYAD_RC_OK; -} \ No newline at end of file +} diff --git a/src/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c index cace5504..3c036e7c 100644 --- a/src/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -6,6 +6,9 @@ #include #include "base64.h" +#if HAVE_CALIPER +#include +#endif extern const base64_maps_t base64_maps_rfc4648; @@ -70,6 +73,9 @@ void dyad_ucx_ep_err_handler (void* arg, ucp_ep_h ep, ucs_status_t status) static ucs_status_t dyad_ucx_request_wait (dyad_dtl_ucx_t* dtl_handle, dyad_ucx_request_t* request) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif ucs_status_t final_request_status = UCS_OK; // If 'request' is actually a request handle, this means the communication // operation is scheduled, but not yet completed. @@ -87,30 +93,39 @@ static ucs_status_t dyad_ucx_request_wait (dyad_dtl_ucx_t* dtl_handle, final_request_status = ucp_request_check_status (request); } while (final_request_status == UCS_INPROGRESS); // Free and deallocate the request object +<<<<<<< HEAD ucp_request_free (request); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return final_request_status; } // If 'request' is actually a UCX error, this means the communication // operation immediately failed. In that case, we simply grab the // 'ucs_status_t' object for the error. else if (UCS_PTR_IS_ERR (request)) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return UCS_PTR_STATUS (request); } // If 'request' is neither a request handle nor an error, then // the communication operation immediately completed successfully. // So, we simply set the status to UCS_OK +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return UCS_OK; } -static inline dyad_rc_t dyad_dtl_ucx_finalize_impl (dyad_dtl_ucx_t** dtl_handle) -{ -} - dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t* self, dyad_dtl_mode_t mode, flux_t* h, bool debug) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif ucp_params_t ucx_params; ucp_worker_params_t worker_params; ucp_config_t* config; @@ -121,6 +136,9 @@ dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t* self, self->private.ucx_dtl_handle = malloc (sizeof (struct dyad_dtl_ucx)); if (self->private.ucx_dtl_handle == NULL) { FLUX_LOG_ERR (h, "Could not allocate UCX DTL context\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_SYSFAIL; } dtl_handle = self->private.ucx_dtl_handle; @@ -220,12 +238,18 @@ dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t* self, self->recv = dyad_dtl_ucx_recv; self->close_connection = dyad_dtl_ucx_close_connection; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; error:; // If an error occured, finalize the DTL handle and // return a failing error code dyad_dtl_ucx_finalize (&self); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_UCXINIT_FAIL; } @@ -234,12 +258,18 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, uint32_t producer_rank, json_t** restrict packed_obj) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif size_t enc_len = 0; char* enc_buf = NULL; ssize_t enc_size = 0; dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; if (dtl_handle->consumer_address == NULL) { // TODO log error +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BADPACK; } FLUX_LOG_INFO (dtl_handle->h, "Encode UCP address using base64\n"); @@ -249,6 +279,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, enc_buf = malloc (enc_len + 1); if (enc_buf == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not allocate buffer for packed address\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_SYSFAIL; } // consumer_address is casted to const char* to avoid warnings @@ -262,6 +295,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, if (enc_size < 0) { // TODO log error free (enc_buf); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BADPACK; } FLUX_LOG_INFO (dtl_handle->h, "Creating UCP tag for tag matching\n"); @@ -272,6 +308,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, uint32_t consumer_rank = 0; if (flux_get_rank (dtl_handle->h, &consumer_rank) < 0) { FLUX_LOG_ERR (dtl_handle->h, "Cannot get consumer rank\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_FLUXFAIL; } // The tag is a 64 bit unsigned integer consisting of the @@ -295,8 +334,14 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, // If the packing failed, log an error if (*packed_obj == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not pack upath and UCX address for RPC\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BADPACK; } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } @@ -304,6 +349,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, const flux_msg_t* msg, char** upath) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif char* enc_addr = NULL; size_t enc_addr_len = 0; int errcode = 0; @@ -326,6 +374,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, &enc_addr_len); if (errcode < 0) { FLUX_LOG_ERR (dtl_handle->h, "Could not unpack Flux message from consumer!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BADUNPACK; } dtl_handle->comm_tag = ((uint64_t)tag_prod << 32) | (uint64_t)tag_cons; @@ -338,6 +389,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, dtl_handle->consumer_address = (ucp_address_t*)malloc (dtl_handle->addr_len); if (dtl_handle->consumer_address == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not allocate memory for consumer address"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_SYSFAIL; } decoded_len = base64_decode_using_maps (&base64_maps_rfc4648, @@ -350,6 +404,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, free (dtl_handle->consumer_address); dtl_handle->consumer_address = NULL; dtl_handle->addr_len = 0; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BAD_B64DECODE; } return DYAD_RC_OK; @@ -368,6 +425,9 @@ dyad_rc_t dyad_dtl_ucx_rpc_recv_response (dyad_dtl_t* self, flux_future_t* f) dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, dyad_dtl_comm_mode_t comm_mode) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif ucp_ep_params_t params; ucs_status_t status = UCS_OK; dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; @@ -387,26 +447,41 @@ dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, FLUX_LOG_ERR (dtl_handle->h, "ucp_ep_create failed with status %d\n", (int)status); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_UCXCOMM_FAIL; } if (dtl_handle->debug) { ucp_ep_print_info (dtl_handle->ep, stderr); } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } else if (comm_mode == DYAD_COMM_RECV) { FLUX_LOG_INFO (dtl_handle->h, "No explicit connection establishment needed for UCX " "receiver\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } else { FLUX_LOG_ERR (dtl_handle->h, "Invalid communication mode: %d\n", comm_mode); // TODO create new RC for this +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BAD_COMM_MODE; } } dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif ucs_status_ptr_t stat_ptr; ucs_status_t status = UCS_OK; dyad_ucx_request_t* req = NULL; @@ -415,6 +490,9 @@ dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen) FLUX_LOG_INFO (dtl_handle->h, "UCP endpoint was not created prior to invoking " "send!\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_UCXCOMM_FAIL; } // ucp_tag_send_sync_nbx is the prefered version of this send since UCX 1.9 @@ -447,14 +525,23 @@ dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen) FLUX_LOG_ERR (dtl_handle->h, "UCP Tag Send failed (status = %d)!\n", (int)status); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_UCXCOMM_FAIL; } FLUX_LOG_INFO (dtl_handle->h, "Data send with UCP succeeded\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif ucs_status_t status = UCS_OK; ucp_tag_message_h msg = NULL; ucp_tag_recv_info_t msg_info; @@ -530,6 +617,9 @@ dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen) // If allocation fails, log an error if (*buf == NULL) { FLUX_LOG_ERR (dtl_handle->h, "Could not allocate memory for file\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_SYSFAIL; } FLUX_LOG_INFO (dtl_handle->h, "Receive data using async UCX operation\n"); @@ -569,15 +659,24 @@ dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen) FLUX_LOG_ERR (dtl_handle->h, "UCX recv failed!\n"); free (*buf); *buf = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_UCXCOMM_FAIL; } FLUX_LOG_INFO (dtl_handle->h, "Data receive using UCX is successful\n"); FLUX_LOG_INFO (dtl_handle->h, "Received %lu bytes from producer\n", *buflen); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif ucs_status_t status = UCS_OK; ucs_status_ptr_t stat_ptr; dyad_dtl_ucx_t* dtl_handle = self->private.ucx_dtl_handle; @@ -639,6 +738,9 @@ dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self) dtl_handle->comm_tag = 0; } FLUX_LOG_INFO (dtl_handle->h, "UCP endpoint close successful\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } else if (dtl_handle->curr_comm_mode == DYAD_COMM_RECV) { // Since we're using tag send/recv, there's no need @@ -647,21 +749,33 @@ dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self) // be valid for DYAD because DYAD won't send a file from // one node to the same node). dtl_handle->comm_tag = 0; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } else { FLUX_LOG_ERR (dtl_handle->h, "Somehow, an invalid comm mode reached " "'close_connection'\n"); // TODO create new RC for this case +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_BAD_COMM_MODE; } } dyad_rc_t dyad_dtl_ucx_finalize (dyad_dtl_t** self) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif dyad_dtl_ucx_t* dtl_handle = NULL; dyad_rc_t rc = DYAD_RC_OK; if (self == NULL || *self == NULL || (*self)->private.ucx_dtl_handle == NULL) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } dtl_handle = (*self)->private.ucx_dtl_handle; @@ -692,5 +806,8 @@ dyad_rc_t dyad_dtl_ucx_finalize (dyad_dtl_t** self) // Free the handle and set to NULL to prevent double free free (dtl_handle); (*self)->private.ucx_dtl_handle = NULL; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return DYAD_RC_OK; } diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 20cfb115..94154b01 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -35,6 +35,10 @@ #include "read_all.h" #include "utils.h" +#if HAVE_CALIPER +#include +#endif + #define TIME_DIFF(Tstart, Tend) \ ((double)(1000000000L * ((Tend).tv_sec - (Tstart).tv_sec) + (Tend).tv_nsec \ - (Tstart).tv_nsec) \ @@ -56,14 +60,23 @@ static void dyad_mod_fini (void) __attribute__ ((destructor)); void dyad_mod_fini (void) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif flux_t *h = flux_open (NULL, 0); if (h != NULL) { } +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif } static void freectx (void *arg) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)arg; flux_msg_handler_delvec (ctx->handlers); if (ctx->dtl_handle != NULL) { @@ -71,10 +84,16 @@ static void freectx (void *arg) ctx->dtl_handle = NULL; } free (ctx); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif } static dyad_mod_ctx_t *getctx (flux_t *h) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)flux_aux_get (h, "dyad"); if (!ctx) { @@ -97,9 +116,15 @@ static dyad_mod_ctx_t *getctx (flux_t *h) goto getctx_done; getctx_error:; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return NULL; getctx_done: +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return ctx; } @@ -113,6 +138,9 @@ dyad_fetch_request_cb (flux_t *h, const flux_msg_t *msg, void *arg) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif FLUX_LOG_INFO (h, "Launched callback for dyad.fetch\n"); dyad_mod_ctx_t *ctx = getctx (h); ssize_t inlen = 0; @@ -200,6 +228,9 @@ dyad_fetch_request_cb (flux_t *h, } errno = saved_errno; FLUX_LOG_INFO (h, "Finished dyad.fetch module invocation\n"); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return; fetch_error: @@ -208,11 +239,17 @@ dyad_fetch_request_cb (flux_t *h, FLUX_LOG_ERR (h, "DYAD_MOD: %s: flux_respond_error", __FUNCTION__); } errno = saved_errno; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return; } static dyad_rc_t dyad_open (flux_t *h, dyad_dtl_mode_t dtl_mode, bool debug) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif dyad_mod_ctx_t *ctx = getctx (h); dyad_rc_t rc = 0; char *e = NULL; @@ -220,6 +257,9 @@ static dyad_rc_t dyad_open (flux_t *h, dyad_dtl_mode_t dtl_mode, bool debug) ctx->debug = debug; rc = dyad_dtl_init (&(ctx->dtl_handle), dtl_mode, h, ctx->debug); +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return rc; } @@ -251,6 +291,9 @@ void usage () DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) { +#if HAVE_CALIPER + CALI_MARK_FUNCTION; +#endif const mode_t m = (S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH | S_ISGID); dyad_mod_ctx_t *ctx = NULL; size_t flag_len = 0; @@ -318,9 +361,15 @@ DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) goto mod_done; mod_error:; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return EXIT_FAILURE; mod_done:; +#if HAVE_CALIPER + CALI_MARK_FUNCTION_END; +#endif return EXIT_SUCCESS; } diff --git a/src/stream/dyad_stream_core.cpp b/src/stream/dyad_stream_core.cpp index 9ccabce9..60e5e620 100644 --- a/src/stream/dyad_stream_core.cpp +++ b/src/stream/dyad_stream_core.cpp @@ -34,6 +34,10 @@ using namespace std; // std::clock () #include // dirname #include +#if HAVE_CALIPER +#include +#endif + namespace dyad { /***************************************************************************** @@ -54,6 +58,9 @@ dyad_stream_core::~dyad_stream_core () void dyad_stream_core::finalize () { +#if HAVE_CALIPER + CALI_CXX_MARK_FUNCTION; +#endif if (m_ctx != NULL) { dyad_finalize (&m_ctx); m_ctx = NULL; @@ -63,6 +70,9 @@ void dyad_stream_core::finalize () void dyad_stream_core::init () { +#if HAVE_CALIPER + CALI_CXX_MARK_FUNCTION; +#endif char *e = NULL; if (m_initialized) { @@ -89,6 +99,9 @@ void dyad_stream_core::init () void dyad_stream_core::init (const dyad_params &p) { +#if HAVE_CALIPER + CALI_CXX_MARK_FUNCTION; +#endif DPRINTF (m_ctx, "DYAD_WRAPPER: Initializeing DYAD wrapper\n"); dyad_rc_t rc = dyad_init (p.m_debug, false, @@ -135,6 +148,9 @@ bool dyad_stream_core::is_dyad_consumer () bool dyad_stream_core::open_sync (const char *path) { +#if HAVE_CALIPER + CALI_CXX_MARK_FUNCTION; +#endif IPRINTF (m_ctx, "DYAD_SYNC OPEN: enters sync (\"%s\").\n", path); if (!m_initialized) { // TODO log @@ -158,6 +174,9 @@ bool dyad_stream_core::open_sync (const char *path) bool dyad_stream_core::close_sync (const char *path) { +#if HAVE_CALIPER + CALI_CXX_MARK_FUNCTION; +#endif IPRINTF (m_ctx, "DYAD_SYNC CLOSE: enters sync (\"%s\").\n", path); if (!m_initialized) { // TODO log diff --git a/src/wrapper/wrapper.c b/src/wrapper/wrapper.c index 829cfcb6..bc96b8a4 100644 --- a/src/wrapper/wrapper.c +++ b/src/wrapper/wrapper.c @@ -38,6 +38,10 @@ using namespace std; // std::clock () #include // dirname #include +#if HAVE_CALIPER +#include +#endif + #include "utils.h" // #include "wrapper.h" #include "dyad_core.h" @@ -86,6 +90,9 @@ static inline int is_wronly (int fd) void dyad_wrapper_init (void) { +#if HAVE_CALIPER + CALI_MARK_BEGIN("dyad_wrapper_init"); +#endif dyad_rc_t rc = DYAD_RC_OK; rc = dyad_init_env (&ctx); @@ -94,6 +101,9 @@ void dyad_wrapper_init (void) fprintf (stderr, "Failed to initialize DYAD (code = %d)\n", rc); ctx->initialized = false; ctx->reenter = false; +#if HAVE_CALIPER + CALI_MARK_END("dyad_wrapper_init"); +#endif return; } @@ -108,18 +118,33 @@ void dyad_wrapper_init (void) (ctx->check) ? "true" : "false"); DYAD_LOG_INFO (ctx, "%s=%u\n", DYAD_KEY_DEPTH_ENV, ctx->key_depth); DYAD_LOG_INFO (ctx, "%s=%u\n", DYAD_KEY_BINS_ENV, ctx->key_bins); +#if HAVE_CALIPER + CALI_MARK_END("dyad_wrapper_init"); +#endif } void dyad_wrapper_fini () { +#if HAVE_CALIPER + CALI_MARK_BEGIN("dyad_wrapper_fini"); +#endif if (ctx == NULL) { +#if HAVE_CALIPER + CALI_MARK_END("dyad_wrapper_fini"); +#endif return; } dyad_finalize (&ctx); +#if HAVE_CALIPER + CALI_MARK_END("dyad_wrapper_fini"); +#endif } DYAD_DLL_EXPORTED int open (const char *path, int oflag, ...) { +#if HAVE_CALIPER + CALI_MARK_BEGIN("dyad_open_wrapper"); +#endif char *error = NULL; typedef int (*open_ptr_t) (const char *, int, mode_t, ...); open_ptr_t func_ptr = NULL; @@ -139,6 +164,9 @@ DYAD_DLL_EXPORTED int open (const char *path, int oflag, ...) func_ptr = (open_ptr_t)dlsym (RTLD_NEXT, "open"); if ((error = dlerror ())) { DPRINTF (ctx, "DYAD_SYNC: error in dlsym: %s\n", error); +#if HAVE_CALIPER + CALI_MARK_END("dyad_open_wrapper"); +#endif return -1; } @@ -160,12 +188,17 @@ DYAD_DLL_EXPORTED int open (const char *path, int oflag, ...) IPRINTF (ctx, "DYAD_SYNC: exists open sync (\"%s\").\n", path); real_call:; - +#if HAVE_CALIPER + CALI_MARK_END("dyad_open_wrapper"); +#endif return (func_ptr (path, oflag, mode)); } DYAD_DLL_EXPORTED FILE *fopen (const char *path, const char *mode) { +#if HAVE_CALIPER + CALI_MARK_BEGIN("dyad_fopen_wrapper"); +#endif char *error = NULL; typedef FILE *(*fopen_ptr_t) (const char *, const char *); fopen_ptr_t func_ptr = NULL; @@ -177,6 +210,9 @@ DYAD_DLL_EXPORTED FILE *fopen (const char *path, const char *mode) func_ptr = (fopen_ptr_t)dlsym (RTLD_NEXT, "fopen"); if ((error = dlerror ())) { DPRINTF (ctx, "DYAD_SYNC: error in dlsym: %s\n", error); +#if HAVE_CALIPER + CALI_MARK_END("dyad_fopen_wrapper"); +#endif return NULL; } @@ -200,11 +236,17 @@ DYAD_DLL_EXPORTED FILE *fopen (const char *path, const char *mode) IPRINTF (ctx, "DYAD_SYNC: exits fopen sync (\"%s\").\n", path); real_call: +#if HAVE_CALIPER + CALI_MARK_END("dyad_fopen_wrapper"); +#endif return (func_ptr (path, mode)); } DYAD_DLL_EXPORTED int close (int fd) { +#if HAVE_CALIPER + CALI_MARK_BEGIN("dyad_close_wrapper"); +#endif bool to_sync = false; char *error = NULL; typedef int (*close_ptr_t) (int); @@ -219,6 +261,9 @@ DYAD_DLL_EXPORTED int close (int fd) func_ptr = (close_ptr_t)dlsym (RTLD_NEXT, "close"); if ((error = dlerror ())) { DPRINTF (ctx, "DYAD_SYNC: error in dlsym: %s\n", error); +#if HAVE_CALIPER + CALI_MARK_END("dyad_close_wrapper"); +#endif return -1; // return the failure code } @@ -286,11 +331,17 @@ real_call:; // semicolon here to avoid the error rc = func_ptr (fd); } +#if HAVE_CALIPER + CALI_MARK_END("dyad_close_wrapper"); +#endif return rc; } DYAD_DLL_EXPORTED int fclose (FILE *fp) { +#if HAVE_CALIPER + CALI_MARK_BEGIN("dyad_fclose_wrapper"); +#endif bool to_sync = false; char *error = NULL; typedef int (*fclose_ptr_t) (FILE *); @@ -306,6 +357,9 @@ DYAD_DLL_EXPORTED int fclose (FILE *fp) func_ptr = (fclose_ptr_t)dlsym (RTLD_NEXT, "fclose"); if ((error = dlerror ())) { DPRINTF (ctx, "DYAD_SYNC: error in dlsym: %s\n", error); +#if HAVE_CALIPER + CALI_MARK_END("dyad_fclose_wrapper"); +#endif return EOF; // return the failure code } @@ -373,6 +427,9 @@ real_call:; rc = func_ptr (fp); } +#if HAVE_CALIPER + CALI_MARK_END("dyad_fclose_wrapper"); +#endif return rc; } From 2146086711250a2af77bf85d790932885b954d18 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 14:02:58 -0700 Subject: [PATCH 14/18] Fixes function start annotations --- src/core/dyad_core.c | 92 ++++++++++++++++++++--------------------- src/dtl/dyad_dtl_impl.c | 26 ++++++------ src/dtl/flux_dtl.c | 10 ++--- src/dtl/ucx_dtl.c | 19 ++++----- src/modules/dyad.c | 12 +++--- 5 files changed, 79 insertions(+), 80 deletions(-) diff --git a/src/core/dyad_core.c b/src/core/dyad_core.c index 51bfbbb8..34ccb93d 100644 --- a/src/core/dyad_core.c +++ b/src/core/dyad_core.c @@ -95,7 +95,7 @@ static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, flux_kvs_txn_t* #endif { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif flux_future_t* f = NULL; DYAD_LOG_INFO (ctx, "Committing transaction to KVS\n"); @@ -105,7 +105,7 @@ static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, flux_kvs_txn_t* if (f == NULL) { DYAD_LOG_ERR (ctx, "Could not commit transaction to Flux KVS\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_BADCOMMIT; } @@ -115,7 +115,7 @@ static inline dyad_rc_t dyad_kvs_commit (const dyad_ctx_t* ctx, flux_kvs_txn_t* flux_future_destroy (f); f = NULL; #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -130,7 +130,7 @@ static inline dyad_rc_t publish_via_flux (const dyad_ctx_t* restrict ctx, #endif { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; flux_kvs_txn_t* txn = NULL; @@ -171,7 +171,7 @@ publish_done:; flux_kvs_txn_destroy (txn); } #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -186,7 +186,7 @@ static inline dyad_rc_t dyad_commit (dyad_ctx_t* restrict ctx, #endif { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; char upath[PATH_MAX]; @@ -217,7 +217,7 @@ commit_done:; setenv (DYAD_CHECK_ENV, "ok", 1); } #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -236,7 +236,7 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, #endif { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; // Lookup information about the desired file (represented by kvs_topic) @@ -250,7 +250,7 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, if (*f == NULL) { DYAD_LOG_ERR (ctx, "KVS lookup failed!\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_BADLOOKUP; } @@ -261,12 +261,12 @@ static inline dyad_rc_t dyad_kvs_lookup (const dyad_ctx_t* ctx, if (rc < 0) { DYAD_LOG_ERR (ctx, "Could not unpack owner's rank from KVS response\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_BADFETCH; } #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -283,7 +283,7 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, #endif { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; char upath[PATH_MAX]; @@ -299,7 +299,7 @@ static inline dyad_rc_t dyad_fetch (const dyad_ctx_t* restrict ctx, if (!cmp_canonical_path_prefix (ctx->cons_managed_path, fname, upath, PATH_MAX)) { DYAD_LOG_INFO (ctx, "%s is not in the Consumer's managed path\n", fname); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -363,7 +363,7 @@ fetch_done:; f = NULL; } #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -382,7 +382,7 @@ static inline dyad_rc_t dyad_get_data (const dyad_ctx_t* ctx, #endif { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; dyad_rc_t final_rc = DYAD_RC_OK; @@ -462,7 +462,7 @@ get_done:; DYAD_LOG_INFO (ctx, "Destroy the Flux future for the RPC\n"); flux_future_destroy (f); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -477,7 +477,7 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, #endif { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; const char* file_data = NULL; @@ -545,7 +545,7 @@ static inline dyad_rc_t dyad_pull (const dyad_ctx_t* restrict ctx, if (rc == DYAD_RC_OK && (ctx && ctx->check)) setenv (DYAD_CHECK_ENV, "ok", 1); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -562,7 +562,7 @@ dyad_rc_t dyad_init (bool debug, dyad_ctx_t** ctx) { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; // If ctx is NULL, we won't be able to return a dyad_ctx_t @@ -574,7 +574,7 @@ dyad_rc_t dyad_init (bool debug, "returning " "a dyad_ctx_t object!\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -588,7 +588,7 @@ dyad_rc_t dyad_init (bool debug, // TODO Indicate already initialized DPRINTF ((*ctx), "DYAD context already initialized\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -599,7 +599,7 @@ dyad_rc_t dyad_init (bool debug, if (*ctx == NULL) { fprintf (stderr, "Could not allocate DYAD context!\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -614,7 +614,7 @@ dyad_rc_t dyad_init (bool debug, "Warning: no managed path provided! DYAD will not do " "anything!\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -630,7 +630,7 @@ dyad_rc_t dyad_init (bool debug, if ((*ctx)->h == NULL) { fprintf (stderr, "Could not open Flux handle!\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_FLUXFAIL; } @@ -640,7 +640,7 @@ dyad_rc_t dyad_init (bool debug, if (flux_get_rank ((*ctx)->h, &((*ctx)->rank)) < 0) { FLUX_LOG_ERR ((*ctx)->h, "Could not get Flux rank!\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_FLUXFAIL; } @@ -650,7 +650,7 @@ dyad_rc_t dyad_init (bool debug, FLUX_LOG_ERR ((*ctx)->h, "No KVS namespace provided!\n"); // TODO see if we want a different return val #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -661,7 +661,7 @@ dyad_rc_t dyad_init (bool debug, free (*ctx); *ctx = NULL; #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -673,7 +673,7 @@ dyad_rc_t dyad_init (bool debug, if (DYAD_IS_ERROR (rc)) { FLUX_LOG_ERR ((*ctx)->h, "Cannot initialize the DTL\n"); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -693,7 +693,7 @@ dyad_rc_t dyad_init (bool debug, free (*ctx); *ctx = NULL; #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -716,7 +716,7 @@ dyad_rc_t dyad_init (bool debug, free (*ctx); *ctx = NULL; #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -728,7 +728,7 @@ dyad_rc_t dyad_init (bool debug, (*ctx)->initialized = true; // TODO Print logging info #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -736,7 +736,7 @@ dyad_rc_t dyad_init (bool debug, dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif char *e = NULL; bool debug = false; @@ -839,7 +839,7 @@ dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) dtl_mode, ctx); #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -847,14 +847,14 @@ dyad_rc_t dyad_init_env (dyad_ctx_t** ctx) dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; dyad_rc_t rc = DYAD_RC_OK; #endif // If the context is not defined, then it is not valid. // So, return DYAD_NOCTX if (!ctx || !ctx->h) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -862,7 +862,7 @@ dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) // valid for a producer operation. So, return DYAD_BADMANAGEDPATH if (ctx->prod_managed_path == NULL || strlen (ctx->prod_managed_path) == 0) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_BADMANAGEDPATH; } @@ -870,7 +870,7 @@ dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) // the producer operation #if HAVE_CALIPER rc = dyad_commit (ctx, fname); - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; return rc; #else return dyad_commit (ctx, fname); @@ -880,14 +880,14 @@ dyad_rc_t dyad_produce (dyad_ctx_t* ctx, const char* fname) dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; // If the context is not defined, then it is not valid. // So, return DYAD_NOCTX if (!ctx || !ctx->h) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_NOCTX; } @@ -895,7 +895,7 @@ dyad_rc_t dyad_consume (dyad_ctx_t* ctx, const char* fname) // valid for a consumer operation. So, return DYAD_BADMANAGEDPATH if (ctx->cons_managed_path == NULL || strlen (ctx->cons_managed_path) == 0) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_BADMANAGEDPATH; } @@ -942,7 +942,7 @@ consume_done:; // Set reenter to true to allow additional intercepting ctx->reenter = true; #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -950,11 +950,11 @@ consume_done:; int dyad_finalize (dyad_ctx_t** ctx) { #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif if (ctx == NULL || *ctx == NULL) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -978,7 +978,7 @@ int dyad_finalize (dyad_ctx_t** ctx) free (*ctx); *ctx = NULL; #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } @@ -990,7 +990,7 @@ __attribute__((annotate("@critical_path()"))) int dyad_sync_directory(dyad_ctx_t* restrict ctx, const char* restrict path) { // Flush new directory entry https://lwn.net/Articles/457671/ #if HAVE_CALIPER - CALI_MARK_FUNCTION + CALI_MARK_FUNCTION_BEGIN; #endif char path_copy[PATH_MAX + 1]; int odir_fd = -1; @@ -1022,7 +1022,7 @@ int dyad_sync_directory(dyad_ctx_t* restrict ctx, const char* restrict path) if (ctx != NULL) ctx->reenter = reenter; #if HAVE_CALIPER - CALI_MARK_FUNCTION_END + CALI_MARK_FUNCTION_END; #endif return rc; } diff --git a/src/dtl/dyad_dtl_impl.c b/src/dtl/dyad_dtl_impl.c index 1f4a1e80..615b38e2 100644 --- a/src/dtl/dyad_dtl_impl.c +++ b/src/dtl/dyad_dtl_impl.c @@ -13,13 +13,13 @@ dyad_rc_t dyad_dtl_init (dyad_dtl_t **dtl_handle, bool debug) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; *dtl_handle = malloc (sizeof (struct dyad_dtl)); if (*dtl_handle == NULL) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_SYSFAIL; } @@ -28,29 +28,29 @@ dyad_rc_t dyad_dtl_init (dyad_dtl_t **dtl_handle, rc = dyad_dtl_ucx_init (*dtl_handle, mode, h, debug); if (DYAD_IS_ERROR (rc)) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return rc; } #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } else if (mode == DYAD_DTL_FLUX_RPC) { rc = dyad_dtl_flux_init (*dtl_handle, mode, h, debug); if (DYAD_IS_ERROR (rc)) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return rc; } #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_BADDTLMODE; } @@ -58,7 +58,7 @@ dyad_rc_t dyad_dtl_init (dyad_dtl_t **dtl_handle, dyad_rc_t dyad_dtl_finalize (dyad_dtl_t **dtl_handle) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif dyad_rc_t rc = DYAD_RC_OK; if (dtl_handle == NULL || *dtl_handle == NULL) @@ -67,7 +67,7 @@ dyad_rc_t dyad_dtl_finalize (dyad_dtl_t **dtl_handle) // this function should be treated as a no-op, and we // should return DYAD_RC_OK to indicate no error has occured #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; if ((*dtl_handle)->mode == DYAD_DTL_UCX) { @@ -75,7 +75,7 @@ dyad_rc_t dyad_dtl_finalize (dyad_dtl_t **dtl_handle) rc = dyad_dtl_ucx_finalize (dtl_handle); if (DYAD_IS_ERROR (rc)) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return rc; } @@ -85,21 +85,21 @@ dyad_rc_t dyad_dtl_finalize (dyad_dtl_t **dtl_handle) rc = dyad_dtl_flux_finalize (dtl_handle); if (DYAD_IS_ERROR (rc)) { #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return rc; } } } else { #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_BADDTLMODE; } free (*dtl_handle); *dtl_handle = NULL; #if HAVE_CALIPER - CALI_MARK_FUNCTION_FUNCTION_END; + CALI_MARK_FUNCTION_END; #endif return DYAD_RC_OK; } diff --git a/src/dtl/flux_dtl.c b/src/dtl/flux_dtl.c index cba4bb59..c3e2f93d 100644 --- a/src/dtl/flux_dtl.c +++ b/src/dtl/flux_dtl.c @@ -10,7 +10,7 @@ dyad_rc_t dyad_dtl_flux_init (dyad_dtl_t* self, bool debug) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif self->private.flux_dtl_handle = malloc (sizeof (struct dyad_dtl_flux)); if (self->private.flux_dtl_handle == NULL) { @@ -46,7 +46,7 @@ dyad_rc_t dyad_dtl_flux_rpc_pack (dyad_dtl_t* restrict self, json_t** restrict packed_obj) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif dyad_dtl_flux_t* dtl_handle = self->private.flux_dtl_handle; *packed_obj = json_pack ("{s:s}", "upath", upath); @@ -68,7 +68,7 @@ dyad_rc_t dyad_dtl_flux_rpc_unpack (dyad_dtl_t* self, char** upath) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif int rc = 0; rc = flux_request_unpack (msg, NULL, "{s:s}", "upath", upath); @@ -108,7 +108,7 @@ dyad_rc_t dyad_dtl_flux_establish_connection (dyad_dtl_t* self, dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif int errcode = 0; FLUX_LOG_INFO (self->private.flux_dtl_handle->h, @@ -139,7 +139,7 @@ dyad_rc_t dyad_dtl_flux_send (dyad_dtl_t* self, void* buf, size_t buflen) dyad_rc_t dyad_dtl_flux_recv (dyad_dtl_t* self, void** buf, size_t* buflen) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif int rc = 0; errno = 0; diff --git a/src/dtl/ucx_dtl.c b/src/dtl/ucx_dtl.c index 3c036e7c..a5c05dd4 100644 --- a/src/dtl/ucx_dtl.c +++ b/src/dtl/ucx_dtl.c @@ -74,7 +74,7 @@ static ucs_status_t dyad_ucx_request_wait (dyad_dtl_ucx_t* dtl_handle, dyad_ucx_request_t* request) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif ucs_status_t final_request_status = UCS_OK; // If 'request' is actually a request handle, this means the communication @@ -93,7 +93,6 @@ static ucs_status_t dyad_ucx_request_wait (dyad_dtl_ucx_t* dtl_handle, final_request_status = ucp_request_check_status (request); } while (final_request_status == UCS_INPROGRESS); // Free and deallocate the request object -<<<<<<< HEAD ucp_request_free (request); #if HAVE_CALIPER CALI_MARK_FUNCTION_END; @@ -124,7 +123,7 @@ dyad_rc_t dyad_dtl_ucx_init (dyad_dtl_t* self, bool debug) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif ucp_params_t ucx_params; ucp_worker_params_t worker_params; @@ -259,7 +258,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_pack (dyad_dtl_t* restrict self, json_t** restrict packed_obj) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif size_t enc_len = 0; char* enc_buf = NULL; @@ -350,7 +349,7 @@ dyad_rc_t dyad_dtl_ucx_rpc_unpack (dyad_dtl_t* self, char** upath) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif char* enc_addr = NULL; size_t enc_addr_len = 0; @@ -426,7 +425,7 @@ dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, dyad_dtl_comm_mode_t comm_mode) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif ucp_ep_params_t params; ucs_status_t status = UCS_OK; @@ -480,7 +479,7 @@ dyad_rc_t dyad_dtl_ucx_establish_connection (dyad_dtl_t* self, dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif ucs_status_ptr_t stat_ptr; ucs_status_t status = UCS_OK; @@ -540,7 +539,7 @@ dyad_rc_t dyad_dtl_ucx_send (dyad_dtl_t* self, void* buf, size_t buflen) dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif ucs_status_t status = UCS_OK; ucp_tag_message_h msg = NULL; @@ -675,7 +674,7 @@ dyad_rc_t dyad_dtl_ucx_recv (dyad_dtl_t* self, void** buf, size_t* buflen) dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif ucs_status_t status = UCS_OK; ucs_status_ptr_t stat_ptr; @@ -768,7 +767,7 @@ dyad_rc_t dyad_dtl_ucx_close_connection (dyad_dtl_t* self) dyad_rc_t dyad_dtl_ucx_finalize (dyad_dtl_t** self) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif dyad_dtl_ucx_t* dtl_handle = NULL; dyad_rc_t rc = DYAD_RC_OK; diff --git a/src/modules/dyad.c b/src/modules/dyad.c index 94154b01..c1b2b507 100644 --- a/src/modules/dyad.c +++ b/src/modules/dyad.c @@ -61,7 +61,7 @@ static void dyad_mod_fini (void) __attribute__ ((destructor)); void dyad_mod_fini (void) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif flux_t *h = flux_open (NULL, 0); @@ -75,7 +75,7 @@ void dyad_mod_fini (void) static void freectx (void *arg) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)arg; flux_msg_handler_delvec (ctx->handlers); @@ -92,7 +92,7 @@ static void freectx (void *arg) static dyad_mod_ctx_t *getctx (flux_t *h) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif dyad_mod_ctx_t *ctx = (dyad_mod_ctx_t *)flux_aux_get (h, "dyad"); @@ -139,7 +139,7 @@ dyad_fetch_request_cb (flux_t *h, void *arg) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif FLUX_LOG_INFO (h, "Launched callback for dyad.fetch\n"); dyad_mod_ctx_t *ctx = getctx (h); @@ -248,7 +248,7 @@ dyad_fetch_request_cb (flux_t *h, static dyad_rc_t dyad_open (flux_t *h, dyad_dtl_mode_t dtl_mode, bool debug) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif dyad_mod_ctx_t *ctx = getctx (h); dyad_rc_t rc = 0; @@ -292,7 +292,7 @@ void usage () DYAD_DLL_EXPORTED int mod_main (flux_t *h, int argc, char **argv) { #if HAVE_CALIPER - CALI_MARK_FUNCTION; + CALI_MARK_FUNCTION_BEGIN; #endif const mode_t m = (S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH | S_ISGID); dyad_mod_ctx_t *ctx = NULL; From f4672a3231ab99115f64738c678c0d70b1f75a6e Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 14:06:12 -0700 Subject: [PATCH 15/18] Adds CALIPER_CFLAGS to CXXFLAGS so that we can use Caliper from C++ code --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index 3f6521e6..59c9e967 100644 --- a/configure.ac +++ b/configure.ac @@ -143,6 +143,7 @@ fi if test "$enable_caliper" = "yes"; then PKG_CHECK_MODULES([CALIPER], [caliper], [], []) CFLAGS="${CFLAGS} ${CALIPER_CFLAGS} " + CXXFLAGS="${CXXFLAGS} ${CALIPER_CFLAGS} " # Do not use CALIPER_LIBS, only link to libcaliper-stub LIBS="${LIBS} ${CALIPER_LIBS} -lrt " AC_DEFINE([HAVE_CALIPER], [1], [Define if you have libcaliper]) From d2d277f2ab75321e7d7e9c31b473ea236438b6f7 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 14:10:58 -0700 Subject: [PATCH 16/18] Adds extern C to dyad_flux_log.h --- src/dtl/dyad_flux_log.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/dtl/dyad_flux_log.h b/src/dtl/dyad_flux_log.h index da196ef9..8cdc5b2a 100644 --- a/src/dtl/dyad_flux_log.h +++ b/src/dtl/dyad_flux_log.h @@ -1,6 +1,10 @@ #ifndef DYAD_CORE_DYAD_FLUX_LOG_H #define DYAD_CORE_DYAD_FLUX_LOG_H +#ifdef __cplusplus +extern "C" { +#endif + #include #ifdef __cplusplus @@ -26,6 +30,10 @@ extern "C" { #define FLUX_LOG_INFO(flux_ctx, ...) flux_log (flux_ctx, LOG_INFO, __VA_ARGS__) #define FLUX_LOG_ERR(flux_ctx, ...) flux_log_error (flux_ctx, __VA_ARGS__) #endif + +#ifdef __cplusplus +} +#endif #ifdef __cplusplus } From 46062310a6d650f310232f7e5a6160cb87bdf09d Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 14:23:47 -0700 Subject: [PATCH 17/18] Tries to fix some extern C stuff --- src/core/dyad_core.h | 6 ++++++ src/dtl/dyad_flux_log.h | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/core/dyad_core.h b/src/core/dyad_core.h index 47cd1961..d4cd6775 100644 --- a/src/core/dyad_core.h +++ b/src/core/dyad_core.h @@ -16,6 +16,12 @@ #include #endif +#include "dyad_envs.h" +#include "dyad_rc.h" +// Includes +#include "dyad_flux_log.h" +#include "dyad_dtl.h" + /***************************************************************************** * * * DYAD Macro Definitions * diff --git a/src/dtl/dyad_flux_log.h b/src/dtl/dyad_flux_log.h index 8cdc5b2a..a0a9f448 100644 --- a/src/dtl/dyad_flux_log.h +++ b/src/dtl/dyad_flux_log.h @@ -1,10 +1,6 @@ #ifndef DYAD_CORE_DYAD_FLUX_LOG_H #define DYAD_CORE_DYAD_FLUX_LOG_H -#ifdef __cplusplus -extern "C" { -#endif - #include #ifdef __cplusplus @@ -30,7 +26,7 @@ extern "C" { #define FLUX_LOG_INFO(flux_ctx, ...) flux_log (flux_ctx, LOG_INFO, __VA_ARGS__) #define FLUX_LOG_ERR(flux_ctx, ...) flux_log_error (flux_ctx, __VA_ARGS__) #endif - + #ifdef __cplusplus } #endif From f452dbb5d04e06827edee85b49b36e1e232641f5 Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Fri, 28 Jul 2023 14:39:21 -0700 Subject: [PATCH 18/18] More attempts to fix include errors --- src/stream/dyad_stream_core.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/stream/dyad_stream_core.cpp b/src/stream/dyad_stream_core.cpp index 60e5e620..5dabade3 100644 --- a/src/stream/dyad_stream_core.cpp +++ b/src/stream/dyad_stream_core.cpp @@ -38,6 +38,7 @@ using namespace std; // std::clock () #include #endif + namespace dyad { /*****************************************************************************