mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-05 14:11:32 +00:00
* In cases when the same `tag` is used for receiving data from the same remote rank, #514 changed the behavior of `Communicator::connect` and `Communicator::recvMemory` to receive data in the order of `std::shared_future::get()` is called, instead of the original behvaior that receive data in the order of the method calls. Since the original behavior is more intuitive, we get that back. Now when `get()` is called on a future, the async function will first call `wait()` on the latest previously returned future. In a recursive manner, this will call `wait()` on all previous futures that are not yet ready. * Removed all deprecated API calls and replaced into the new ones.
This commit is contained in:
@@ -32,29 +32,25 @@ void setupMeshTopology(int rank, int worldsize, void* data, size_t dataSize) {
|
||||
|
||||
std::vector<mscclpp::SemaphoreId> semaphoreIds;
|
||||
std::vector<mscclpp::RegisteredMemory> localMemories;
|
||||
std::vector<mscclpp::NonblockingFuture<std::shared_ptr<mscclpp::Connection>>> connections(world_size);
|
||||
std::vector<mscclpp::NonblockingFuture<mscclpp::RegisteredMemory>> remoteMemories;
|
||||
std::vector<std::shared_future<std::shared_ptr<mscclpp::Connection>>> connections(world_size);
|
||||
std::vector<std::shared_future<mscclpp::RegisteredMemory>> remoteMemories;
|
||||
|
||||
for (int r = 0; r < world_size; ++r) {
|
||||
if (r == rank) continue;
|
||||
mscclpp::Transport transport = mscclpp::Transport::CudaIpc;
|
||||
// Connect with all other ranks
|
||||
connections[r] = comm.connectOnSetup(r, 0, transport);
|
||||
connections[r] = comm.connect(r, 0, transport);
|
||||
auto memory = comm.registerMemory(data, dataSize, mscclpp::Transport::CudaIpc | ibTransport);
|
||||
localMemories.push_back(memory);
|
||||
comm.sendMemoryOnSetup(memory, r, 0);
|
||||
remoteMemories.push_back(comm.recvMemoryOnSetup(r, 0));
|
||||
comm.sendMemory(memory, r, 0);
|
||||
remoteMemories.push_back(comm.recvMemory(r, 0));
|
||||
}
|
||||
|
||||
comm.setup();
|
||||
|
||||
for (int r = 0; r < world_size; ++r) {
|
||||
if (r == rank) continue;
|
||||
semaphoreIds.push_back(proxyService.buildAndAddSemaphore(comm, connections[r].get()));
|
||||
}
|
||||
|
||||
comm.setup();
|
||||
|
||||
std::vector<DeviceHandle<mscclpp::PortChannel>> portChannels;
|
||||
for (size_t i = 0; i < semaphoreIds.size(); ++i) {
|
||||
portChannels.push_back(mscclpp::deviceHandle(mscclpp::PortChannel(
|
||||
|
||||
Reference in New Issue
Block a user