mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
Add multiple d gridwise gemm on Navi21 for ResNet50 (#517)
* start add example * add multiple d fp16 example * device transfer elementwiseop to gridwise * gridwise add multiple d * change example for multiple d * fix spill registers * fix for passthrough element op * fix int8 overflow * change example file name * add instance for dl multiple d * example add DsDataType * remove grouped_convolution_forward_dl.hpp * add head file(was deleted before) * fix not support device issue * format * remove passthrough check Co-authored-by: letaoqin <letaoqin@amd.com>
This commit is contained in:
@@ -187,6 +187,22 @@ struct AddRelu
|
||||
const float a = x0 + type_convert<float>(x1);
|
||||
y = a > 0.0f ? a : 0.0f;
|
||||
};
|
||||
|
||||
template <>
|
||||
__host__ __device__ constexpr void
|
||||
operator()<int, int, int8_t>(int& y, const int& x0, const int8_t& x1) const
|
||||
{
|
||||
const int8_t a = x0 + x1;
|
||||
y = a > 0 ? a : 0;
|
||||
};
|
||||
|
||||
template <>
|
||||
__host__ __device__ constexpr void
|
||||
operator()<int8_t, int8_t, int8_t>(int8_t& y, const int8_t& x0, const int8_t& x1) const
|
||||
{
|
||||
const int8_t a = x0 + x1;
|
||||
y = a > 0 ? a : 0;
|
||||
};
|
||||
};
|
||||
|
||||
struct AddHardswish
|
||||
|
||||
Reference in New Issue
Block a user