mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 21:09:08 +00:00
Add FP64 XDL GEMM built-in function (#199)
* add intrin_mfma_f64_16x16x4f64
* add example
* gemm reference add double data type
* chang init data
* fix M N PerXdlops
* fix ifdef
* add comparsion config
* add conv fwd example
* format log out
* change rc matrix egister layout
* reorganize example
* reorganize example 2
* format,because merge develop
* fix call impl adding acc data type
* lost ;
* add compiler warning
* change example tunning parameters
* add test for fp64
* add instance
* add test/gemm/gemm_fp64.cpp
* fix get name issue
* remove some tunning parameter
* fix conflict
* format
* use integer value for GEMM test
* add acc data type
* remove typeid because fp16
* fix streamconfig etc bug from merging develop
* format
* remove test_gemm_xdl_fp64
* add AccDataType
* AccDataType problem
Co-authored-by: qinletao <letaoqin@amd.com>
Co-authored-by: Chao Liu <chao.liu2@amd.com>
[ROCm/composable_kernel commit: 3e6c2610ae]
This commit is contained in:
@@ -98,6 +98,7 @@ namespace profiler {
|
||||
template <typename ADataType,
|
||||
typename BDataType,
|
||||
typename CDataType,
|
||||
typename AccDataType,
|
||||
typename ALayout,
|
||||
typename BLayout,
|
||||
typename CLayout>
|
||||
@@ -511,8 +512,14 @@ void profile_gemm_impl(int do_verification,
|
||||
bf16_to_f32_(b_k_n, b_f32_k_n);
|
||||
bf16_to_f32_(c_m_n_device_result, c_m_n_device_f32_result);
|
||||
|
||||
using ReferenceGemmInstance = ck::tensor_operation::host::
|
||||
ReferenceGemm<float, float, float, AElementOp, BElementOp, CElementOp>;
|
||||
using ReferenceGemmInstance =
|
||||
ck::tensor_operation::host::ReferenceGemm<float,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
AElementOp,
|
||||
BElementOp,
|
||||
CElementOp>;
|
||||
|
||||
auto ref_gemm = ReferenceGemmInstance{};
|
||||
auto ref_invoker = ref_gemm.MakeInvoker();
|
||||
@@ -544,6 +551,7 @@ void profile_gemm_impl(int do_verification,
|
||||
ck::tensor_operation::host::ReferenceGemm<ADataType,
|
||||
BDataType,
|
||||
CDataType,
|
||||
AccDataType,
|
||||
AElementOp,
|
||||
BElementOp,
|
||||
CElementOp>;
|
||||
|
||||
@@ -144,8 +144,13 @@ bool profile_gemm_reduce_impl(int do_verification,
|
||||
|
||||
if(do_verification)
|
||||
{
|
||||
using ReferenceGemmInstance = ck::tensor_operation::host::
|
||||
ReferenceGemm<ADataType, BDataType, CDataType, AElementOp, BElementOp, CElementOp>;
|
||||
using ReferenceGemmInstance = ck::tensor_operation::host::ReferenceGemm<ADataType,
|
||||
BDataType,
|
||||
CDataType,
|
||||
DDataType,
|
||||
AElementOp,
|
||||
BElementOp,
|
||||
CElementOp>;
|
||||
|
||||
auto ref_gemm = ReferenceGemmInstance{};
|
||||
auto ref_invoker = ref_gemm.MakeInvoker();
|
||||
|
||||
@@ -43,6 +43,7 @@ namespace profiler {
|
||||
template <typename ADataType,
|
||||
typename BDataType,
|
||||
typename CDataType,
|
||||
typename AccDataType,
|
||||
typename ALayout,
|
||||
typename BLayout,
|
||||
typename CLayout>
|
||||
@@ -271,6 +272,7 @@ void profile_grouped_gemm_impl(int do_verification,
|
||||
ck::tensor_operation::host::ReferenceGemm<ADataType,
|
||||
BDataType,
|
||||
CDataType,
|
||||
AccDataType,
|
||||
AElementOp,
|
||||
BElementOp,
|
||||
CElementOp>;
|
||||
|
||||
Reference in New Issue
Block a user