mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
Add declarations for atomic add for fp16 and unsigned short. (#2483)
* add template for fp16 atomic add
* add template for unsigned short atomic add
* use atomicCAS in atomic add for fp16 and unsigned short
* revrt back to atomic add using casting
[ROCm/composable_kernel commit: 1b66f3f4a3]
This commit is contained in:
@@ -32,6 +32,22 @@ __device__ float atomic_add<float>(float* p_dst, const float& x)
|
||||
return atomicAdd(p_dst, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ unsigned short atomic_add<unsigned short>(unsigned short* p_dst, const unsigned short& x)
|
||||
{
|
||||
// Use atomicAdd with unsigned int
|
||||
return static_cast<unsigned short>(
|
||||
atomicAdd(reinterpret_cast<unsigned int*>(p_dst), static_cast<unsigned int>(x)));
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ _Float16 atomic_add<_Float16>(_Float16* p_dst, const _Float16& x)
|
||||
{
|
||||
// Use atomicAdd with unsigned int
|
||||
return static_cast<_Float16>(
|
||||
atomicAdd(reinterpret_cast<unsigned int*>(p_dst), static_cast<unsigned int>(x)));
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ double atomic_add<double>(double* p_dst, const double& x)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user