Files
mscclpp/python/test/proxy_test.cu
Saeed Maleki 85e8017535 Atomic for semaphores instead of fences (#188)
Co-authored-by: Pratyush Patel <pratyushpatel.1995@gmail.com>
Co-authored-by: Esha Choukse <eschouks@microsoft.com>
Co-authored-by: Changho Hwang <changhohwang@microsoft.com>
2023-10-13 18:57:08 +08:00

23 lines
736 B
Plaintext

// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#include <mscclpp/fifo_device.hpp>
#include <mscclpp/semaphore_device.hpp>
extern "C" __global__ void __launch_bounds__(1024, 1) proxy(int my_rank, int nranks, mscclpp::FifoDeviceHandle fifo,
mscclpp::Host2DeviceSemaphoreDeviceHandle* semaphores) {
int tid = threadIdx.x;
if (tid == 0) {
mscclpp::ProxyTrigger trigger;
trigger.fst = 123;
trigger.snd = 0;
uint64_t currentFifoHead = fifo.push(trigger);
// wait for the work to be done in cpu side
fifo.sync(currentFifoHead);
}
__syncthreads();
if (tid < nranks && tid != my_rank) {
semaphores[tid].wait();
}
}