mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-11 17:00:22 +00:00
Fix for multi-nodes test (#614)
Fix multi-node test --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -414,8 +414,8 @@ void BaseTestEngine::setupMeshConnections(std::vector<DeviceHandle<mscclpp::Port
|
||||
mscclpp::RegisteredMemory& localRegMemory = (outputBuff) ? outputBufRegMem : inputBufRegMem;
|
||||
|
||||
// store memory to keep resource alive
|
||||
inputMemory_ = inputBufRegMem;
|
||||
outputMemory_ = outputBufRegMem;
|
||||
inputMemories_.push_back(inputBufRegMem);
|
||||
outputMemories_.push_back(outputBufRegMem);
|
||||
setupMeshConnectionsInternal(connections, localRegMemory, remoteRegMemories);
|
||||
|
||||
if (setupChannel != nullptr) {
|
||||
@@ -446,8 +446,8 @@ void BaseTestEngine::setupMeshConnections(std::vector<mscclpp::MemoryChannel>& m
|
||||
mscclpp::RegisteredMemory& localRegMemory =
|
||||
(outputBuff && semantic == ChannelSemantic::PUT) ? outputBufRegMem : inputBufRegMem;
|
||||
// store memory to keep resource alive
|
||||
inputMemory_ = inputBufRegMem;
|
||||
outputMemory_ = outputBufRegMem;
|
||||
inputMemories_.push_back(inputBufRegMem);
|
||||
outputMemories_.push_back(outputBufRegMem);
|
||||
setupMeshConnectionsInternal(connections, localRegMemory, remoteRegMemories);
|
||||
|
||||
std::unordered_map<size_t, std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>>> memorySemaphores;
|
||||
@@ -498,8 +498,8 @@ void BaseTestEngine::setupMeshConnections(std::vector<mscclpp::MemoryChannel>& m
|
||||
(getPacketBuff) ? getPacketBufRegMem : ((outputBuff) ? outputBufRegMem : inputBufRegMem);
|
||||
// store memory to keep resource alive
|
||||
scratchMemory_ = getPacketBufRegMem;
|
||||
inputMemory_ = inputBufRegMem;
|
||||
outputMemory_ = outputBufRegMem;
|
||||
inputMemories_.push_back(inputBufRegMem);
|
||||
outputMemories_.push_back(outputBufRegMem);
|
||||
|
||||
setupMeshConnectionsInternal(connections, localRegMemory, remoteRegMemories);
|
||||
|
||||
|
||||
@@ -132,8 +132,8 @@ class BaseTestEngine {
|
||||
std::shared_ptr<mscclpp::Communicator> comm_;
|
||||
std::shared_ptr<mscclpp::BaseProxyService> chanService_;
|
||||
mscclpp::RegisteredMemory scratchMemory_;
|
||||
mscclpp::RegisteredMemory inputMemory_;
|
||||
mscclpp::RegisteredMemory outputMemory_;
|
||||
std::vector<mscclpp::RegisteredMemory> inputMemories_;
|
||||
std::vector<mscclpp::RegisteredMemory> outputMemories_;
|
||||
cudaStream_t stream_;
|
||||
int error_;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user