mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
add an example of customized type convert - bfp16_rtn (#869)
* add an example of customized bfp16_rtn * fixed threadwise_copy --------- Co-authored-by: Jing Zhang <jizha@amd.com>
This commit is contained in:
@@ -104,13 +104,13 @@ struct ThreadwiseTensorSliceTransfer_v6r1
|
||||
|
||||
// apply pointwise operation
|
||||
static_for<0, ScalarPerVector, 1>{}([&](auto i) {
|
||||
SrcData v;
|
||||
DstData v;
|
||||
|
||||
// apply element-wise operation
|
||||
element_op_(v, src_vector_container.template AsType<SrcData>()[i]);
|
||||
|
||||
// apply type convert
|
||||
dst_vector_container.template AsType<DstData>()(i) = type_convert<DstData>(v);
|
||||
dst_vector_container.template AsType<DstData>()(i) = v;
|
||||
});
|
||||
|
||||
const bool is_dst_valid =
|
||||
|
||||
Reference in New Issue
Block a user