Merge commit '93420ecf89d0747c35b096aa95453eaaceb0aea3' into develop

This commit is contained in:
assistant-librarian[bot]
2025-07-09 02:39:29 +00:00
parent 4a9f6e392f
commit 9455ad0507
2 changed files with 22 additions and 33 deletions

View File

@@ -32,33 +32,6 @@ __device__ float atomic_add<float>(float* p_dst, const float& x)
return atomicAdd(p_dst, x);
}
template <>
__device__ unsigned short atomic_add<unsigned short>(unsigned short* p_dst, const unsigned short& x)
{
unsigned short old_val, new_val;
do
{
old_val = *p_dst;
new_val = old_val + x;
} while(atomicCAS(p_dst, old_val, new_val) != old_val);
return old_val;
}
template <>
__device__ _Float16 atomic_add<_Float16>(_Float16* p_dst, const _Float16& x)
{
_Float16 old_val, new_val;
do
{
old_val = *p_dst;
new_val = old_val + x; // Proper FP16 addition
} while(atomicCAS(reinterpret_cast<unsigned short*>(p_dst),
*reinterpret_cast<unsigned short*>(&old_val),
*reinterpret_cast<unsigned short*>(&new_val)) !=
*reinterpret_cast<unsigned short*>(&old_val));
return old_val;
}
template <>
__device__ double atomic_add<double>(double* p_dst, const double& x)
{