diff --git a/include/mscclpp/switch_channel_device.hpp b/include/mscclpp/switch_channel_device.hpp index 7b749f7a..841b7f32 100644 --- a/include/mscclpp/switch_channel_device.hpp +++ b/include/mscclpp/switch_channel_device.hpp @@ -84,7 +84,9 @@ struct SwitchChannelDeviceHandle { : "=r"(val.words[0]), "=r"(val.words[1]), "=r"(val.words[2]), "=r"(val.words[3]) : "l"(ptr) : "memory"); - } else if constexpr (std::is_same_v) { + } +#if (defined(__CUDA_ARCH_SPECIFIC__) || defined(__CUDA_ARCH_FAMILY_SPECIFIC__)) && (__CUDA_ARCH__ >= 1000) + else if constexpr (std::is_same_v) { if constexpr (std::is_same_v) { asm("multimem.ld_reduce.relaxed.sys.global.add.acc::f16.e4m3x4 %0, [%1];" : "=r"(val.words[0]) @@ -150,7 +152,9 @@ struct SwitchChannelDeviceHandle { : "l"(ptr) : "memory"); } - } else { + } +#endif + else { static_assert(dependentFalse, "Not supported type"); } return val;