add an example of customized type convert - bfp16_rtn (#869)

* add an example of customized bfp16_rtn

* fixed threadwise_copy

---------

Co-authored-by: Jing Zhang <jizha@amd.com>
This commit is contained in:
zjing14
2023-08-29 12:31:24 -05:00
committed by GitHub
parent 31ea132aa2
commit 38ada109ea
5 changed files with 52 additions and 4 deletions

View File

@@ -104,13 +104,13 @@ struct ThreadwiseTensorSliceTransfer_v6r1
// apply pointwise operation
static_for<0, ScalarPerVector, 1>{}([&](auto i) {
SrcData v;
DstData v;
// apply element-wise operation
element_op_(v, src_vector_container.template AsType<SrcData>()[i]);
// apply type convert
dst_vector_container.template AsType<DstData>()(i) = type_convert<DstData>(v);
dst_vector_container.template AsType<DstData>()(i) = v;
});
const bool is_dst_valid =