[mscclpp-test] Add AllReduce and AllToAll tests (#83)

This commit is contained in:
Changho Hwang
2023-06-07 18:58:47 +08:00
committed by GitHub
parent d9568a3235
commit 0c14a67ad2
11 changed files with 555 additions and 289 deletions

View File

@@ -228,8 +228,12 @@ struct SimpleDeviceChannel {
SimpleDeviceChannel(DeviceChannel devChan, MemoryId dst, MemoryId src) : devChan_(devChan), dst_(dst), src_(src) {}
SimpleDeviceChannel(DeviceChannel devChan, void* dstPtr, void* srcPtr)
: devChan_(devChan), srcPtr_(srcPtr), dstPtr_(dstPtr) {}
SimpleDeviceChannel(DeviceChannel devChan, void* dstPtr, void* srcPtr, void* tmpPtr = nullptr)
: devChan_(devChan), dstPtr_(dstPtr), srcPtr_(srcPtr), tmpPtr_(tmpPtr) {}
SimpleDeviceChannel(DeviceChannel devChan, MemoryId dst, MemoryId src, void* dstPtr, void* srcPtr,
void* tmpPtr = nullptr)
: devChan_(devChan), dst_(dst), src_(src), dstPtr_(dstPtr), srcPtr_(srcPtr), tmpPtr_(tmpPtr) {}
SimpleDeviceChannel(const SimpleDeviceChannel& other) = default;
@@ -278,8 +282,11 @@ struct SimpleDeviceChannel {
MemoryId src_;
// these are used for direct copy
void* srcPtr_;
void* dstPtr_;
void* srcPtr_;
// extra local buffer for out-of-place copy
void* tmpPtr_;
};
} // namespace channel

View File

@@ -12,6 +12,9 @@ struct DeviceSyncer {
// previous work of all threads in cooperating blocks is finished.
__forceinline__ __device__ void sync(int blockNum) {
int maxOldCnt = blockNum - 1;
__threadfence();
// Make sure that all threads in this block have done `__threadfence()`
// before to flip `flag`.
__syncthreads();
if (threadIdx.x == 0) {
int tmpIsAdd = isAdd_ ^ 1;