mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 20:40:07 +00:00
add an example of customized type convert - bfp16_rtn (#869)
* add an example of customized bfp16_rtn
* fixed threadwise_copy
---------
Co-authored-by: Jing Zhang <jizha@amd.com>
[ROCm/composable_kernel commit: 38ada109ea]
This commit is contained in:
@@ -92,11 +92,11 @@ struct ReferenceGemm : public device::BaseOperator
|
||||
ck::type_convert<AccDataType>(v_a) * ck::type_convert<AccDataType>(v_b);
|
||||
}
|
||||
|
||||
AccDataType v_c;
|
||||
CDataType v_c;
|
||||
|
||||
arg.c_element_op_(v_c, v_acc);
|
||||
|
||||
arg.c_m_n_(m, n) = ck::type_convert<CDataType>(v_c);
|
||||
arg.c_m_n_(m, n) = v_c;
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(
|
||||
|
||||
Reference in New Issue
Block a user