add an example of customized type convert - bfp16_rtn (#869)

* add an example of customized bfp16_rtn

* fixed threadwise_copy

---------

Co-authored-by: Jing Zhang <jizha@amd.com>
This commit is contained in:
zjing14
2023-08-29 12:31:24 -05:00
committed by GitHub
parent 31ea132aa2
commit 38ada109ea
5 changed files with 52 additions and 4 deletions

View File

@@ -39,6 +39,12 @@ struct PassThrough
y = x;
}
template <>
__host__ __device__ void operator()<half_t, float>(half_t& y, const float& x) const
{
y = type_convert<half_t>(x);
}
template <>
__host__ __device__ void operator()<bhalf_t, bhalf_t>(bhalf_t& y, const bhalf_t& x) const
{

View File

@@ -104,13 +104,13 @@ struct ThreadwiseTensorSliceTransfer_v6r1
// apply pointwise operation
static_for<0, ScalarPerVector, 1>{}([&](auto i) {
SrcData v;
DstData v;
// apply element-wise operation
element_op_(v, src_vector_container.template AsType<SrcData>()[i]);
// apply type convert
dst_vector_container.template AsType<DstData>()(i) = type_convert<DstData>(v);
dst_vector_container.template AsType<DstData>()(i) = v;
});
const bool is_dst_valid =