Add support for multicast reduce insruction (#316)

This commit is contained in:
Roshan Dathathri
2024-06-19 13:28:12 -07:00
committed by GitHub
parent 1351f9f1c5
commit 93ed8e1e58
3 changed files with 21 additions and 5 deletions

View File

@@ -41,8 +41,8 @@ extern "C" __global__ void __launch_bounds__(1024, 1)
for (int idx = my_st + my_offset; idx < my_en; idx += my_step) {
uint4 val;
DeviceMulticastPointerDeviceHandle::multimemLoad(val, mc_ptr + idx);
DeviceMulticastPointerDeviceHandle::multimemStore(val, mc_ptr + idx);
mscclpp::DeviceMulticastPointerDeviceHandle::multimemLoadReduce(val, mc_ptr + idx);
mscclpp::DeviceMulticastPointerDeviceHandle::multimemStore(val, mc_ptr + idx);
}
deviceSyncer.sync(gridDim.x);