Add example of conv_fwd_bias_relu_add for int4, int8, bfp16, fp16, and fp32 (#343)

* [LWPCK-359] Initial commit

* Working version for fp16, add results to readme

* Update according to PR #341

* Update results in readme

* Add fp32 example

* Add bf16 example

* Update fp16 and fp32 examples

* Add int8 example

* Add separate lengths and strides tensors for D tensors

Co-authored-by: Rosty Geyyer <rosty.geyyer@amd.com>
This commit is contained in:
Rostyslav Geyyer
2022-08-12 15:30:27 -05:00
committed by GitHub
parent 35e49f2de6
commit 0c6ef7c14e
9 changed files with 2048 additions and 0 deletions

View File

@@ -78,6 +78,26 @@ struct AddReluAdd
float c = b + x2;
y = c;
}
template <>
__host__ __device__ constexpr void operator()<bhalf_t, float, bhalf_t, bhalf_t>(
bhalf_t& y, const float& x0, const bhalf_t& x1, const bhalf_t& x2) const
{
float a = x0 + x1;
float b = a > 0 ? a : 0;
float c = b + x2;
y = c;
}
template <>
__host__ __device__ constexpr void operator()<int8_t, int8_t, int8_t, int8_t>(
int8_t& y, const int8_t& x0, const int8_t& x1, const int8_t& x2) const
{
int32_t a = x0 + x1;
int32_t b = a > 0 ? a : 0;
int32_t c = b + x2;
y = c;
}
};
struct AddHardswishAdd