mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
Add example of conv_fwd_bias_relu_add for int4, int8, bfp16, fp16, and fp32 (#343)
* [LWPCK-359] Initial commit * Working version for fp16, add results to readme * Update according to PR #341 * Update results in readme * Add fp32 example * Add bf16 example * Update fp16 and fp32 examples * Add int8 example * Add separate lengths and strides tensors for D tensors Co-authored-by: Rosty Geyyer <rosty.geyyer@amd.com>
This commit is contained in:
@@ -78,6 +78,26 @@ struct AddReluAdd
|
||||
float c = b + x2;
|
||||
y = c;
|
||||
}
|
||||
|
||||
template <>
|
||||
__host__ __device__ constexpr void operator()<bhalf_t, float, bhalf_t, bhalf_t>(
|
||||
bhalf_t& y, const float& x0, const bhalf_t& x1, const bhalf_t& x2) const
|
||||
{
|
||||
float a = x0 + x1;
|
||||
float b = a > 0 ? a : 0;
|
||||
float c = b + x2;
|
||||
y = c;
|
||||
}
|
||||
|
||||
template <>
|
||||
__host__ __device__ constexpr void operator()<int8_t, int8_t, int8_t, int8_t>(
|
||||
int8_t& y, const int8_t& x0, const int8_t& x1, const int8_t& x2) const
|
||||
{
|
||||
int32_t a = x0 + x1;
|
||||
int32_t b = a > 0 ? a : 0;
|
||||
int32_t c = b + x2;
|
||||
y = c;
|
||||
}
|
||||
};
|
||||
|
||||
struct AddHardswishAdd
|
||||
|
||||
Reference in New Issue
Block a user