mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
CGEMM examples bf16, fp32, int8 (#332)
* Add int8 specialization for elementwise Add and Subtract. * CGEMM examples bf16, fp32, int8 * Add convert reference output to CDataType. * Skip BF16 data type during testing. * Lower K value to get rid of accumulation error. * Fix merge artifact. * Fix changed function name: GetElementSpaceSize() * Fix merge artifact. Co-authored-by: Adam Osewski <aosewski@amd.com>
This commit is contained in:
@@ -6,8 +6,9 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "ck/tensor_operation/gpu/device/device_base.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_base.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
@@ -91,7 +92,7 @@ struct ReferenceCGemm : public device::BaseOperator
|
||||
v_c_real += v_a_real * v_b_real - v_a_imag * v_b_imag;
|
||||
}
|
||||
|
||||
arg.c_m_n_real_(m, n) = v_c_real;
|
||||
arg.c_m_n_real_(m, n) = ck::type_convert<CDataType>(v_c_real);
|
||||
};
|
||||
|
||||
auto f_mk_kn_mn_imag = [&](auto m, auto n) {
|
||||
@@ -107,7 +108,7 @@ struct ReferenceCGemm : public device::BaseOperator
|
||||
v_c_imag += v_a_real * v_b_imag + v_a_imag * v_b_real;
|
||||
}
|
||||
|
||||
arg.c_m_n_imag_(m, n) = v_c_imag;
|
||||
arg.c_m_n_imag_(m, n) = ck::type_convert<CDataType>(v_c_imag);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_mk_kn_mn_real,
|
||||
|
||||
Reference in New Issue
Block a user