mirror of
https://github.com/microsoft/mscclpp.git
synced 2026-05-11 00:40:12 +00:00
Separate NPKit CPU timestamp access from different blocks for AMD platform (#321)
Reference: https://github.com/ROCm/rccl/pull/1229
This commit is contained in:
@@ -262,6 +262,14 @@ struct Executor::Impl {
|
||||
static uint32_t flag = 0;
|
||||
int nthreadblocks = context.deviceExecutionPlans.size();
|
||||
#if defined(ENABLE_NPKIT)
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
if (nthreadblocks > NPKIT_MAX_NUM_GPU_THREADBLOCKS) {
|
||||
throw Error("Executor plan launching " + std::to_string(nthreadblocks) +
|
||||
" thread blocks, exceeding NPKit support (" + std::to_string(NPKIT_MAX_NUM_GPU_THREADBLOCKS) +
|
||||
")",
|
||||
ErrorCode::ExecutorError);
|
||||
}
|
||||
#endif
|
||||
size_t sharedMemSize = sizeof(DeviceExecutionPlan) + NPKIT_SHM_NUM_EVENTS * sizeof(NpKitEvent);
|
||||
#else
|
||||
size_t sharedMemSize = sizeof(DeviceExecutionPlan);
|
||||
|
||||
Reference in New Issue
Block a user