mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
Add v3 support for Groupd fwd conv+bias+clamp & ckProfiler (#2463)
* Add logging to IsSupported.
* Less casting in AddClamp
* Conv+bias+clamp instances & profiler BF16
* Fix 3D instances & run just 1x for verification.
* :Run just once for verification conv fwd.
* ckProfiler conv fwd clampwq
* Remove exec bit & formatting
* Add support for MultiD for grouped conv fwd v3.
* Enable 2Lds.
* clean
* align instances
* align instances
* profiler fixes
* Fixes
* fix
* fix
---------
Co-authored-by: Adam Osewski <root@quanta-ccs-aus-f01-19.cs-aus.dcgpu>
Co-authored-by: Bartłomiej Kocot <barkocot@amd.com>
[ROCm/composable_kernel commit: c8eb2f995c]
This commit is contained in:
@@ -379,10 +379,10 @@ struct AddClamp
|
||||
__host__ __device__ constexpr void
|
||||
operator()<half_t, half_t, half_t>(half_t& y, const half_t& x0, const half_t& x1) const
|
||||
{
|
||||
const half_t a = x0 + x1;
|
||||
y = a > type_convert<half_t>(floor_)
|
||||
? (a < type_convert<half_t>(ceil_) ? a : type_convert<half_t>(ceil_))
|
||||
: type_convert<half_t>(floor_);
|
||||
const half_t floor = type_convert<half_t>(floor_);
|
||||
const half_t ceil = type_convert<half_t>(ceil_);
|
||||
const half_t a = x0 + x1;
|
||||
y = a > floor ? (a < ceil ? a : ceil) : floor;
|
||||
};
|
||||
|
||||
template <>
|
||||
|
||||
Reference in New Issue
Block a user