* In cases when the same `tag` is used for receiving data from the same
remote rank, #514 changed the behavior of `Communicator::connect` and
`Communicator::recvMemory` to receive data in the order of
`std::shared_future::get()` is called, instead of the original behvaior
that receive data in the order of the method calls. Since the original
behavior is more intuitive, we get that back. Now when `get()` is called
on a future, the async function will first call `wait()` on the latest
previously returned future. In a recursive manner, this will call
`wait()` on all previous futures that are not yet ready.
* Removed all deprecated API calls and replaced into the new ones.
This commit is contained in:
Changho Hwang
2025-05-13 13:43:35 -07:00
committed by GitHub
parent 5205618c4a
commit de664ad200
19 changed files with 178 additions and 156 deletions

View File

@@ -32,29 +32,25 @@ void setupMeshTopology(int rank, int worldsize, void* data, size_t dataSize) {
std::vector<mscclpp::SemaphoreId> semaphoreIds;
std::vector<mscclpp::RegisteredMemory> localMemories;
std::vector<mscclpp::NonblockingFuture<std::shared_ptr<mscclpp::Connection>>> connections(world_size);
std::vector<mscclpp::NonblockingFuture<mscclpp::RegisteredMemory>> remoteMemories;
std::vector<std::shared_future<std::shared_ptr<mscclpp::Connection>>> connections(world_size);
std::vector<std::shared_future<mscclpp::RegisteredMemory>> remoteMemories;
for (int r = 0; r < world_size; ++r) {
if (r == rank) continue;
mscclpp::Transport transport = mscclpp::Transport::CudaIpc;
// Connect with all other ranks
connections[r] = comm.connectOnSetup(r, 0, transport);
connections[r] = comm.connect(r, 0, transport);
auto memory = comm.registerMemory(data, dataSize, mscclpp::Transport::CudaIpc | ibTransport);
localMemories.push_back(memory);
comm.sendMemoryOnSetup(memory, r, 0);
remoteMemories.push_back(comm.recvMemoryOnSetup(r, 0));
comm.sendMemory(memory, r, 0);
remoteMemories.push_back(comm.recvMemory(r, 0));
}
comm.setup();
for (int r = 0; r < world_size; ++r) {
if (r == rank) continue;
semaphoreIds.push_back(proxyService.buildAndAddSemaphore(comm, connections[r].get()));
}
comm.setup();
std::vector<DeviceHandle<mscclpp::PortChannel>> portChannels;
for (size_t i = 0; i < semaphoreIds.size(); ++i) {
portChannels.push_back(mscclpp::deviceHandle(mscclpp::PortChannel(