Skip to content

Commit

Permalink
Add host hashing unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
chhwang committed Jun 16, 2023
1 parent 92d5814 commit 2b7da92
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 15 deletions.
3 changes: 1 addition & 2 deletions src/communicator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,7 @@ MSCCLPP_API_CPP std::shared_ptr<Connection> Communicator::connectOnSetup(int rem
if (pimpl->rankToHash_[remoteRank] != pimpl->rankToHash_[pimpl->bootstrap_->getRank()]) {
std::stringstream ss;
ss << "Cuda IPC connection can only be made within a node: " << remoteRank << "(" << std::hex
<< pimpl->rankToHash_[pimpl->bootstrap_->getRank()] << ")"
<< " != " << pimpl->bootstrap_->getRank() << "(" << std::hex
<< pimpl->rankToHash_[remoteRank] << ") != " << pimpl->bootstrap_->getRank() << "(" << std::hex
<< pimpl->rankToHash_[pimpl->bootstrap_->getRank()] << ")";
throw mscclpp::Error(ss.str(), ErrorCode::InvalidUsage);
}
Expand Down
6 changes: 2 additions & 4 deletions src/utils_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,20 @@

#include "debug.h"

namespace {
constexpr char HOSTID_FILE[32] = "/proc/sys/kernel/random/boot_id";

bool matchIf(const char* string, const char* ref, bool matchExact) {
static bool matchIf(const char* string, const char* ref, bool matchExact) {
// Make sure to include '\0' in the exact case
int matchLen = matchExact ? strlen(string) + 1 : strlen(ref);
return strncmp(string, ref, matchLen) == 0;
}

bool matchPort(const int port1, const int port2) {
static bool matchPort(const int port1, const int port2) {
if (port1 == -1) return true;
if (port2 == -1) return true;
if (port1 == port2) return true;
return false;
}
} // namespace

namespace mscclpp {
std::string int64ToBusId(int64_t id) {
Expand Down
2 changes: 1 addition & 1 deletion test/mp_unit/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
target_sources(mp_unit_tests PRIVATE
mp_unit_tests.cc
bootstrap_tests.cc
ib_tests.cu
communicator_tests.cu
device_channel_tests.cu
direct_channel_tests.cu
mp_unit_tests.cc
)
4 changes: 0 additions & 4 deletions test/mp_unit/communicator_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ void CommunicatorTestBase::TearDown() {

void CommunicatorTestBase::setNumRanksToUse(int num) { numRanksToUse = num; }

int CommunicatorTestBase::rankToLocalRank(int rank) const { return rank % gEnv->nRanksPerNode; }

int CommunicatorTestBase::rankToNode(int rank) const { return rank / gEnv->nRanksPerNode; }

void CommunicatorTestBase::connectMesh(bool useIbOnly) {
for (int i = 0; i < numRanksToUse; i++) {
if (i != gEnv->rank) {
Expand Down
49 changes: 49 additions & 0 deletions test/mp_unit/mp_unit_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,24 @@

#include <mpi.h>

#include <cstring>
#include <sstream>

#include "utils_internal.hpp"

const char gDefaultIpPort[] = "127.0.0.1:50053";
MultiProcessTestEnv* gEnv = nullptr;

int rankToLocalRank(int rank) {
if (gEnv == nullptr) throw std::runtime_error("rankToLocalRank is called before gEnv is initialized");
return rank % gEnv->nRanksPerNode;
}

int rankToNode(int rank) {
if (gEnv == nullptr) throw std::runtime_error("rankToNode is called before gEnv is initialized");
return rank / gEnv->nRanksPerNode;
}

mscclpp::Transport ibIdToTransport(int id) {
mscclpp::Transport IBs[] = {mscclpp::Transport::IB0, mscclpp::Transport::IB1, mscclpp::Transport::IB2,
mscclpp::Transport::IB3, mscclpp::Transport::IB4, mscclpp::Transport::IB5,
Expand Down Expand Up @@ -81,3 +94,39 @@ TEST_F(MultiProcessTest, Prelim) {
// Test to make sure the MPI environment is set up correctly
ASSERT_GE(gEnv->worldSize, 2);
}
#include <iostream>
TEST_F(MultiProcessTest, HostName) {
const size_t maxNameLen = 1024;
std::vector<char> buffer(gEnv->worldSize * maxNameLen, '\0');
std::string hostName = mscclpp::getHostName(maxNameLen, '\0');
// Copy hostName to buffer
memcpy(buffer.data() + gEnv->rank * maxNameLen, hostName.c_str(), hostName.size());

MPI_Allgather(MPI_IN_PLACE, 0, MPI_BYTE, buffer.data(), maxNameLen, MPI_BYTE, MPI_COMM_WORLD);

for (int rank = 0; rank < gEnv->worldSize; rank++) {
char rankHostName[maxNameLen + 1];
strncpy(rankHostName, buffer.data() + rank * maxNameLen, maxNameLen);
if (rankToNode(rank) == rankToNode(gEnv->rank)) {
ASSERT_EQ(std::string(rankHostName), hostName);
} else {
ASSERT_NE(std::string(rankHostName), hostName);
}
}
}

TEST_F(MultiProcessTest, HostHash) {
std::vector<uint64_t> buffer(gEnv->worldSize, 0);
uint64_t hostHash = mscclpp::getHostHash();
buffer[gEnv->rank] = hostHash;

MPI_Allgather(MPI_IN_PLACE, 0, MPI_BYTE, buffer.data(), sizeof(hostHash), MPI_BYTE, MPI_COMM_WORLD);

for (int rank = 0; rank < gEnv->worldSize; rank++) {
if (rankToNode(rank) == rankToNode(gEnv->rank)) {
ASSERT_EQ(buffer[rank], hostHash);
} else {
ASSERT_NE(buffer[rank], hostHash);
}
}
}
8 changes: 4 additions & 4 deletions test/mp_unit/mp_unit_tests.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class MultiProcessTestEnv : public ::testing::Environment {

extern MultiProcessTestEnv* gEnv;

mscclpp::Transport ibIdToTransport(int id);
int rankToLocalRank(int rank);
int rankToNode(int rank);

class MultiProcessTest : public ::testing::Test {
protected:
void TearDown() override;
Expand Down Expand Up @@ -77,16 +81,12 @@ class IbPeerToPeerTest : public IbTestBase {
std::array<mscclpp::IbMrInfo, 2> mrInfo;
};

mscclpp::Transport ibIdToTransport(int id);

class CommunicatorTestBase : public MultiProcessTest {
protected:
void SetUp() override;
void TearDown() override;

void setNumRanksToUse(int num);
int rankToLocalRank(int rank) const;
int rankToNode(int rank) const;
void connectMesh(bool useIbOnly = false);

// Register a local memory and receive corresponding remote memories
Expand Down
1 change: 1 addition & 0 deletions test/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ target_sources(unit_tests PRIVATE
numa_tests.cc
socket_tests.cc
utils_tests.cc
utils_internal_tests.cc
)
17 changes: 17 additions & 0 deletions test/unit/utils_internal_tests.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <gtest/gtest.h>

#include <thread>

#include "utils_internal.hpp"

TEST(UtilsInternalTest, getHostHash) {
uint64_t hash1 = mscclpp::getHostHash();
uint64_t hash2;

std::thread th([&hash2]() { hash2 = mscclpp::getHostHash(); });

ASSERT_TRUE(th.joinable());
th.join();

EXPECT_EQ(hash1, hash2);
}

0 comments on commit 2b7da92

Please sign in to comment.