# Commnunication initialize with mscclpp API In this tutorial, you will write a simple program to initialize communication between eight GPUs using MSCCL++ C++ API. You will also learn how to use the Python API to initialize communication. ## Prerequisites A system with eight GPUs is required to run this tutorial. Also make sure that you have installed MSCCL++ on your system. If not, please follow the [quick start](../quickstart.md). ## Initialize Communication with C++ API We will setup a mesh topology with eight GPUs. Each GPU will be connected to its neighbors. The following code shows how to initialize communication with MSCCL++ C++ API. ```cpp #include #include #include #include #include #include template using DeviceHandle = mscclpp::DeviceHandle; __constant__ DeviceHandle constPortChans[8]; void setupMeshTopology(int rank, int worldsize, void* data, size_t dataSize) { std::string ip_port = "10.0.0.4:50000"; auto bootstrap = std::make_shared(rank, worldsize); bootstrap->initialize(ip_port); mscclpp::Communicator comm(bootstrap); mscclpp::ProxyService proxyService; std::vector semaphoreIds; std::vector localMemories; std::vector>> connections(world_size); std::vector> remoteMemories; for (int r = 0; r < world_size; ++r) { if (r == rank) continue; mscclpp::Transport transport = mscclpp::Transport::CudaIpc; // Connect with all other ranks connections[r] = comm.connect(transport, r); auto memory = comm.registerMemory(data, dataSize, mscclpp::Transport::CudaIpc | ibTransport); localMemories.push_back(memory); comm.sendMemory(memory, r); remoteMemories.push_back(comm.recvMemory(r)); } for (int r = 0; r < world_size; ++r) { if (r == rank) continue; auto sema = communicator->buildSemaphore(connections[r].get(), r).get(); semaphoreIds.push_back(proxyService->addSemaphore(sema)); } std::vector> portChannels; for (size_t i = 0; i < semaphoreIds.size(); ++i) { portChannels.push_back(mscclpp::deviceHandle(mscclpp::PortChannel( proxyService.portChannel(semaphoreIds[i]), proxyService.addMemory(remoteMemories[i].get()), proxyService.addMemory(localMemories[i])))); } if (portChannels.size() > sizeof(constPortChans) / sizeof(DeviceHandle)) { std::runtime_error("unexpected error"); } CUDACHECK(cudaMemcpyToSymbol(constPortChans, portChannels.data(), sizeof(DeviceHandle) * portChannels.size())); } ```