// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #ifndef MSCCLPP_MP_UNIT_TESTS_HPP_ #define MSCCLPP_MP_UNIT_TESTS_HPP_ #include #include #include #include #include #include #include "../framework.hpp" #include "ib.hpp" #include "utils_internal.hpp" // Skip the current test if IBVerbs is not available in this build #if defined(USE_IBVERBS) #define REQUIRE_IBVERBS #else #define REQUIRE_IBVERBS SKIP_TEST() << "This test requires IBVerbs that the current build does not support." #endif class MultiProcessTestEnv : public ::mscclpp::test::Environment { public: MultiProcessTestEnv(int argc, const char** argv); void SetUp(); void TearDown(); const int argc; const char** argv; int rank; int worldSize; int nRanksPerNode; std::unordered_map args; }; extern MultiProcessTestEnv* gEnv; mscclpp::Transport ibIdToTransport(int id); int rankToLocalRank(int rank); int rankToNode(int rank); class MultiProcessTest : public ::mscclpp::test::TestCase { protected: void TearDown() override; }; class BootstrapTest : public MultiProcessTest { protected: void bootstrapTestAllGather(std::shared_ptr bootstrap); void bootstrapTestBarrier(std::shared_ptr bootstrap); void bootstrapTestSendRecv(std::shared_ptr bootstrap); void bootstrapTestAll(std::shared_ptr bootstrap); // Each test case should finish within 30 seconds. mscclpp::Timer bootstrapTestTimer{30}; }; class IbTestBase : public MultiProcessTest { protected: void SetUp() override; int cudaDevNum; int cudaDevId; std::string ibDevName; }; class IbPeerToPeerTest : public IbTestBase { protected: void SetUp() override; void registerBufferAndConnect(void* buf, size_t size); void stageSendWrite(uint32_t size, uint64_t wrId, uint64_t srcOffset, uint64_t dstOffset, bool signaled); void stageSendAtomicAdd(uint64_t wrId, uint64_t dstOffset, uint64_t addVal, bool signaled); void stageSendWriteWithImm(uint32_t size, uint64_t wrId, uint64_t srcOffset, uint64_t dstOffset, bool signaled, unsigned int immData); std::shared_ptr bootstrap; std::shared_ptr ibCtx; std::shared_ptr qp; std::shared_ptr mr; size_t bufSize; std::array qpInfo; std::array mrInfo; }; class CommunicatorTestBase : public MultiProcessTest { protected: void SetUp() override; void TearDown() override; void setNumRanksToUse(int num); void connectMesh(bool useIpc = true, bool useIb = true, bool useEthernet = false); // Register a local memory and receive corresponding remote memories void registerMemoryPairs(void* buff, size_t buffSize, mscclpp::TransportFlags transport, int tag, const std::vector& remoteRanks, mscclpp::RegisteredMemory& localMemory, std::unordered_map& remoteMemories); // Register a local memory an receive one corresponding remote memory void registerMemoryPair(void* buff, size_t buffSize, mscclpp::TransportFlags transport, int tag, int remoteRank, mscclpp::RegisteredMemory& localMemory, mscclpp::RegisteredMemory& remoteMemory); int numRanksToUse = -1; std::shared_ptr communicator; mscclpp::Transport ibTransport; std::vector registeredMemories; std::unordered_map connections; std::unordered_map cpuConnections; }; class CommunicatorTest : public CommunicatorTestBase { protected: void SetUp() override; void TearDown() override; void deviceBufferInit(); void writeToRemote(int dataCountPerRank); bool testWriteCorrectness(bool skipLocal = false); const size_t numBuffers = 10; const int deviceBufferSize = 1024 * 1024; std::vector> devicePtr; std::vector localMemory; std::vector> remoteMemory; }; template using DeviceHandle = mscclpp::DeviceHandle; using IbMode = mscclpp::EndpointConfig::Ib::Mode; class PortChannelOneToOneTest : public CommunicatorTestBase { protected: struct PingPongTestParams { bool useIPC; bool useIB; bool useEthernet; bool waitWithPoll; IbMode ibMode; }; void SetUp() override; void TearDown() override; void setupMeshConnections(std::vector& portChannels, bool useIPC, bool useIb, bool useEthernet, void* sendBuff, size_t sendBuffBytes, void* recvBuff = nullptr, size_t recvBuffBytes = 0, IbMode ibMode = IbMode::Default); void testPingPong(PingPongTestParams params); void testPingPongPerf(PingPongTestParams params); void testPacketPingPong(bool useIbOnly, IbMode ibMode = IbMode::Default); void testPacketPingPongPerf(bool useIbOnly, IbMode ibMode = IbMode::Default); void testBandwidth(PingPongTestParams params); std::shared_ptr proxyService; }; class MemoryChannelOneToOneTest : public CommunicatorTestBase { protected: void SetUp() override; void TearDown() override; void setupMeshConnections(std::vector& memoryChannels, void* inputBuff, size_t inputBuffBytes, void* outputBuff = nullptr, size_t outputBuffBytes = 0); using PacketPingPongKernelWrapper = std::function; void packetPingPongTest(const std::string testName, PacketPingPongKernelWrapper kernelWrapper); std::unordered_map> memorySemaphores; }; class SemaphorePerfTest : public CommunicatorTestBase { protected: void SetUp() override; void TearDown() override; }; class SwitchChannelTest : public CommunicatorTestBase { protected: void SetUp() override; void TearDown() override; }; class ExecutorTest : public MultiProcessTest { protected: void SetUp() override; void TearDown() override; std::shared_ptr executor; std::string npkitDumpDir; }; #endif // MSCCLPP_MP_UNIT_TESTS_HPP_