Skip to content

Commit

Permalink
coll/han: Implement MPI_Alltoallv in han using SMSC and XPMEM
Browse files Browse the repository at this point in the history
Extension of the previous han MPI_Alltoall algorithm, this change adds
MPI_Alltoallv to han for a hierarchy-aware algorithm which uses XPMEM
via the SMSC module in order to directly read data from ranks on the
same host.

The provides significant speed-up over the basic implementation when
small messages are used, as many messages can be coalesced and packed
into fewer sends.

Introduces MCA parameters:
 - coll_han_alltoallv_smsc_avg_send_limit
 - coll_han_alltoallv_smsc_noncontig_limit

Signed-off-by: Luke Robison <[email protected]>
  • Loading branch information
lrbison committed Aug 13, 2024
1 parent c3bebd8 commit 3b69d98
Show file tree
Hide file tree
Showing 9 changed files with 1,046 additions and 2 deletions.
1 change: 1 addition & 0 deletions ompi/mca/coll/han/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ coll_han.h \
coll_han_trigger.h \
coll_han_algorithms.h \
coll_han_alltoall.c \
coll_han_alltoallv.c \
coll_han_dynamic.h \
coll_han_dynamic_file.h \
coll_han_barrier.c \
Expand Down
20 changes: 20 additions & 0 deletions ompi/mca/coll/han/coll_han.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ typedef struct mca_coll_han_op_module_name_t {
mca_coll_han_op_up_low_module_name_t scatter;
mca_coll_han_op_up_low_module_name_t scatterv;
mca_coll_han_op_up_low_module_name_t alltoall;
mca_coll_han_op_up_low_module_name_t alltoallv;
} mca_coll_han_op_module_name_t;

/**
Expand Down Expand Up @@ -260,6 +261,11 @@ typedef struct mca_coll_han_component_t {
/* alltoall: parallel stages */
int32_t han_alltoall_pstages;

/* low level module for alltoallv */
uint32_t han_alltoallv_low_module;
int64_t han_alltoallv_smsc_avg_send_limit;
double han_alltoallv_smsc_noncontig_activation_limit;


/* name of the modules */
mca_coll_han_op_module_name_t han_op_module_name;
Expand All @@ -286,6 +292,8 @@ typedef struct mca_coll_han_component_t {

/* Define maximum dynamic errors printed by rank 0 with a 0 verbosity level */
int max_dynamic_errors;

opal_free_list_t pack_buffers;
} mca_coll_han_component_t;

/*
Expand All @@ -297,6 +305,7 @@ typedef struct mca_coll_han_single_collective_fallback_s
union
{
mca_coll_base_module_alltoall_fn_t alltoall;
mca_coll_base_module_alltoallv_fn_t alltoallv;
mca_coll_base_module_allgather_fn_t allgather;
mca_coll_base_module_allgatherv_fn_t allgatherv;
mca_coll_base_module_allreduce_fn_t allreduce;
Expand All @@ -319,6 +328,7 @@ typedef struct mca_coll_han_single_collective_fallback_s
typedef struct mca_coll_han_collectives_fallback_s
{
mca_coll_han_single_collective_fallback_t alltoall;
mca_coll_han_single_collective_fallback_t alltoallv;
mca_coll_han_single_collective_fallback_t allgather;
mca_coll_han_single_collective_fallback_t allgatherv;
mca_coll_han_single_collective_fallback_t allreduce;
Expand Down Expand Up @@ -384,6 +394,9 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
#define previous_alltoall fallback.alltoall.alltoall
#define previous_alltoall_module fallback.alltoall.module

#define previous_alltoallv fallback.alltoallv.alltoallv
#define previous_alltoallv_module fallback.alltoallv.module

#define previous_allgather fallback.allgather.allgather
#define previous_allgather_module fallback.allgather.module

Expand Down Expand Up @@ -440,6 +453,7 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
HAN_UNINSTALL_COLL_API(COMM, HANM, allgather); \
HAN_UNINSTALL_COLL_API(COMM, HANM, allgatherv); \
HAN_UNINSTALL_COLL_API(COMM, HANM, alltoall); \
HAN_UNINSTALL_COLL_API(COMM, HANM, alltoallv); \
han_module->enabled = false; /* entire module set to pass-through from now on */ \
} while(0)

Expand Down Expand Up @@ -503,6 +517,9 @@ int
mca_coll_han_alltoall_intra_dynamic(ALLTOALL_BASE_ARGS,
mca_coll_base_module_t *module);
int
mca_coll_han_alltoallv_intra_dynamic(ALLTOALLV_BASE_ARGS,
mca_coll_base_module_t *module);
int
mca_coll_han_allgather_intra_dynamic(ALLGATHER_BASE_ARGS,
mca_coll_base_module_t *module);
int
Expand Down Expand Up @@ -566,4 +583,7 @@ static inline struct mca_smsc_endpoint_t *mca_coll_han_get_smsc_endpoint (struct
return (struct mca_smsc_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC];
}

#define COLL_HAN_PACKBUF_PAYLOAD_BYTES (128*1024)


#endif /* MCA_COLL_HAN_EXPORT_H */
4 changes: 4 additions & 0 deletions ompi/mca/coll/han/coll_han_algorithms.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ mca_coll_han_algorithm_value_t* mca_coll_han_available_algorithms[COLLCOUNT] =
{"smsc", (fnptr_t)&mca_coll_han_alltoall_using_smsc}, // 2-level
{ 0 }
},
[ALLTOALLV] = (mca_coll_han_algorithm_value_t[]){
{"smsc", (fnptr_t)&mca_coll_han_alltoallv_using_smsc}, // 2-level
{ 0 }
},
};

int
Expand Down
5 changes: 5 additions & 0 deletions ompi/mca/coll/han/coll_han_algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,5 +214,10 @@ int
mca_coll_han_alltoall_using_smsc(ALLTOALL_BASE_ARGS,
mca_coll_base_module_t *module);

/* Alltoallv */
int
mca_coll_han_alltoallv_using_smsc(ALLTOALLV_BASE_ARGS,
mca_coll_base_module_t *module);


#endif
Loading

0 comments on commit 3b69d98

Please sign in to comment.