add an example of customized type convert - bfp16_rtn (#869)

* add an example of customized bfp16_rtn

* fixed threadwise_copy

---------

Co-authored-by: Jing Zhang <jizha@amd.com>
This commit is contained in:
zjing14
2023-08-29 12:31:24 -05:00
committed by GitHub
parent 31ea132aa2
commit 38ada109ea
5 changed files with 52 additions and 4 deletions

View File

@@ -39,6 +39,12 @@ struct PassThrough
y = x;
}
template <>
__host__ __device__ void operator()<half_t, float>(half_t& y, const float& x) const
{
y = type_convert<half_t>(x);
}
template <>
__host__ __device__ void operator()<bhalf_t, bhalf_t>(bhalf_t& y, const bhalf_t& x) const
{