diff --git a/src/ext/collectives/allreduce/allreduce_nvls_packet.cu b/src/ext/collectives/allreduce/allreduce_nvls_packet.cu index 2ef0516e..56455b6e 100644 --- a/src/ext/collectives/allreduce/allreduce_nvls_packet.cu +++ b/src/ext/collectives/allreduce/allreduce_nvls_packet.cu @@ -83,6 +83,7 @@ void AllreduceNvlsPacket::initialize(std::shared_ptr comm) { this->nvlsConnections_ = setupNvlsConnections(comm, nvlsBufferSize_, nSwitchChannels); this->switchChannels_ = setupNvlsChannels(this->nvlsConnections_, this->scratchBuffer_, this->scratchBufferSize_, nSwitchChannels); + comm->bootstrap()->barrier(); } AlgorithmCtxKey AllreduceNvlsPacket::generateAllreduceContextKey(const void*, void*, size_t, DataType, bool) {