mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Improve normalization (#580)
* Sync the order of type string with template parameter * Add more instances * Check the vector size and remove redundant var * Extract var to static, prepare to separate sweep once kernel * Separate sweeponce flow and optimize the flow * 1. Rename AccDatatype in normalization to computeData 2. Rename AccElementwiseOperation to YElementwiseOperation in normalization * Remove useless code * Update naive variance kernel * Refine string * Fix typo * Support naive variance for device_normalization * Check the blocksize * Share the VGPR of x and y * Share the VGPR of gamma and beta * Add more instances * Support fp16 sqrt for experiment * Add CHANGELOG * Fix typo * clang-format
This commit is contained in:
@@ -83,6 +83,11 @@ static inline __host__ bool isnan(int4_t x)
|
||||
};
|
||||
#endif
|
||||
|
||||
static inline __host__ half_t sqrt(half_t x)
|
||||
{
|
||||
return static_cast<half_t>(std::sqrt(static_cast<float>(x)));
|
||||
};
|
||||
|
||||
static inline __host__ float sqrt(float x) { return std::sqrt(x); };
|
||||
|
||||
static inline __host__ double sqrt(double x) { return std::sqrt(x); };
|
||||
@@ -158,6 +163,11 @@ static inline __device__ bool isnan(half_t x)
|
||||
return (xx & 0x7FFF) > 0x7C00;
|
||||
};
|
||||
|
||||
static inline __device__ half_t sqrt(half_t x)
|
||||
{
|
||||
return static_cast<half_t>(__builtin_amdgcn_sqrtf(static_cast<float>(x)));
|
||||
};
|
||||
|
||||
static inline __device__ float sqrt(float x) { return __builtin_amdgcn_sqrtf(x); };
|
||||
|
||||
static inline __device__ double sqrt(double x) { return __builtin_amdgcn_sqrt(x); };
|
||||
|
||||
Reference in New Issue
Block a user