mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 03:49:41 +00:00
Allow building CK for specific data types and split off last remaining DL instances. (#830)
* properly split conv_nd_bwd_data instances
* split conv2d_fwd instance data types
* split the gemm, conv2d_fwd and batched_gemm_softamx_gemm
* split the tests by data types where possible
* filter examples by DTYPES
* split few remaining examples by DTYPES
* filter most instances by DTYPES
* add new lines at end of headers, fix grouped_gemm profiler
* fix syntax
* split the ckprofiler instances by DTYPES
* split the conv2d and quantization DL and XDL instances
* fix the splitting of conv2d DL instances
* split softmax and pool_fwd tests for fp16 and fp32 types
* fix syntax
* fix the dl_int8 quantization instances isolation
[ROCm/composable_kernel commit: 08eb176929]
This commit is contained in:
@@ -60,6 +60,6 @@ add_subdirectory(contraction)
|
||||
add_subdirectory(pool_fwd)
|
||||
add_subdirectory(batched_gemm_multi_d)
|
||||
add_subdirectory(grouped_convnd_bwd_data)
|
||||
if(GPU_TARGETS MATCHES "gfx1100")
|
||||
if(GPU_TARGETS MATCHES "gfx11")
|
||||
add_subdirectory(wmma_op)
|
||||
endif()
|
||||
|
||||
@@ -2,21 +2,26 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
add_test_executable(test_batched_gemm_fp16 batched_gemm_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance)
|
||||
|
||||
add_test_executable(test_batched_gemm_fp32 batched_gemm_fp32.cpp)
|
||||
target_link_libraries(test_batched_gemm_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_fp32 PRIVATE device_batched_gemm_instance)
|
||||
|
||||
add_test_executable(test_batched_gemm_bf16 batched_gemm_bf16.cpp)
|
||||
target_link_libraries(test_batched_gemm_bf16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_bf16 PRIVATE device_batched_gemm_instance)
|
||||
|
||||
add_test_executable(test_batched_gemm_int8 batched_gemm_int8.cpp)
|
||||
target_link_libraries(test_batched_gemm_int8 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_int8 PRIVATE device_batched_gemm_instance)
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_test_executable(test_batched_gemm_fp16 batched_gemm_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance)
|
||||
endif()
|
||||
if(DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
|
||||
add_test_executable(test_batched_gemm_fp32 batched_gemm_fp32.cpp)
|
||||
target_link_libraries(test_batched_gemm_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_fp32 PRIVATE device_batched_gemm_instance)
|
||||
endif()
|
||||
if(DTYPES MATCHES "bf16" OR NOT DEFINED DTYPES)
|
||||
add_test_executable(test_batched_gemm_bf16 batched_gemm_bf16.cpp)
|
||||
target_link_libraries(test_batched_gemm_bf16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_bf16 PRIVATE device_batched_gemm_instance)
|
||||
endif()
|
||||
if(DTYPES MATCHES "int8" OR NOT DEFINED DTYPES)
|
||||
add_test_executable(test_batched_gemm_int8 batched_gemm_int8.cpp)
|
||||
target_link_libraries(test_batched_gemm_int8 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_int8 PRIVATE device_batched_gemm_instance)
|
||||
endif()
|
||||
set(target 1)
|
||||
endif()
|
||||
endforeach()
|
||||
@@ -2,10 +2,12 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
add_custom_target(test_batched_gemm_gemm)
|
||||
add_gtest_executable(test_batched_gemm_gemm_fp16 test_batched_gemm_gemm_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_gemm_fp16 PRIVATE utility device_batched_gemm_gemm_instance)
|
||||
add_dependencies(test_batched_gemm_gemm test_batched_gemm_gemm_fp16)
|
||||
set(target 1)
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_custom_target(test_batched_gemm_gemm)
|
||||
add_gtest_executable(test_batched_gemm_gemm_fp16 test_batched_gemm_gemm_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_gemm_fp16 PRIVATE utility device_batched_gemm_gemm_instance)
|
||||
add_dependencies(test_batched_gemm_gemm test_batched_gemm_gemm_fp16)
|
||||
set(target 1)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
@@ -1,5 +1,4 @@
|
||||
# TODO: Enable for gfx90a after complier fix
|
||||
if(DL_KERNELS)
|
||||
add_gtest_executable(test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp)
|
||||
target_link_libraries(test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance)
|
||||
add_gtest_executable(test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp)
|
||||
target_link_libraries(test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance)
|
||||
endif()
|
||||
|
||||
@@ -2,9 +2,11 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance)
|
||||
set(target 1)
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance)
|
||||
set(target 1)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@@ -2,10 +2,12 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
add_custom_target(test_batched_gemm_softmax_gemm)
|
||||
add_gtest_executable(test_batched_gemm_softmax_gemm_fp16 test_batched_gemm_softmax_gemm_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_softmax_gemm_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_instance)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm test_batched_gemm_softmax_gemm_fp16)
|
||||
set(target 1)
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_custom_target(test_batched_gemm_softmax_gemm)
|
||||
add_gtest_executable(test_batched_gemm_softmax_gemm_fp16 test_batched_gemm_softmax_gemm_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_softmax_gemm_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_instance)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm test_batched_gemm_softmax_gemm_fp16)
|
||||
set(target 1)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
@@ -2,21 +2,25 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
add_custom_target(test_batched_gemm_softmax_gemm_permute)
|
||||
|
||||
add_gtest_executable(test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16.cpp)
|
||||
add_gtest_executable(test_batched_gemm_softmax_gemm_permute_bf16 test_batched_gemm_softmax_gemm_permute_bf16.cpp)
|
||||
target_link_libraries(test_batched_gemm_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
target_link_libraries(test_batched_gemm_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_fp16)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_bf16)
|
||||
|
||||
add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_fp16 test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp)
|
||||
add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_bf16 test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp)
|
||||
target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16)
|
||||
if(DTYPES MATCHES "fp16" OR DTYPES MATCHES "bf16" OR NOT DEFINED DTYPES)
|
||||
add_custom_target(test_batched_gemm_softmax_gemm_permute)
|
||||
endif()
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_gtest_executable(test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16.cpp)
|
||||
add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_fp16 test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_fp16)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16)
|
||||
endif()
|
||||
if(DTYPES MATCHES "bf16" OR NOT DEFINED DTYPES)
|
||||
add_gtest_executable(test_batched_gemm_softmax_gemm_permute_bf16 test_batched_gemm_softmax_gemm_permute_bf16.cpp)
|
||||
add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_bf16 test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp)
|
||||
target_link_libraries(test_batched_gemm_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_bf16)
|
||||
add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16)
|
||||
endif()
|
||||
set(target 1)
|
||||
endif()
|
||||
endforeach()
|
||||
@@ -1,7 +1,6 @@
|
||||
add_custom_target(test_elementwise_normalization)
|
||||
|
||||
add_gtest_executable(test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp)
|
||||
|
||||
target_link_libraries(test_elementwise_layernorm_fp16 PRIVATE utility device_elementwise_normalization_instance)
|
||||
|
||||
add_dependencies(test_elementwise_normalization test_elementwise_layernorm_fp16)
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_custom_target(test_elementwise_normalization)
|
||||
add_gtest_executable(test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp)
|
||||
target_link_libraries(test_elementwise_layernorm_fp16 PRIVATE utility device_elementwise_normalization_instance)
|
||||
add_dependencies(test_elementwise_normalization test_elementwise_layernorm_fp16)
|
||||
endif()
|
||||
@@ -2,10 +2,12 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_custom_target(test_gemm_layernorm)
|
||||
add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp)
|
||||
target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
|
||||
add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
|
||||
set(target 1)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)
|
||||
endif()
|
||||
@@ -1,3 +1,4 @@
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
@@ -12,3 +13,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
|
||||
set(target 1)
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
@@ -1,16 +1,19 @@
|
||||
add_custom_target(test_normalization)
|
||||
|
||||
add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
|
||||
add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
|
||||
|
||||
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance)
|
||||
target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance)
|
||||
target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
|
||||
target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
|
||||
|
||||
add_dependencies(test_normalization test_layernorm2d_fp32)
|
||||
add_dependencies(test_normalization test_layernorm2d_fp16)
|
||||
add_dependencies(test_normalization test_groupnorm_fp16)
|
||||
add_dependencies(test_normalization test_groupnorm_fp32)
|
||||
if(DTYPES MATCHES "fp16" OR DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
|
||||
add_custom_target(test_normalization)
|
||||
endif()
|
||||
if(DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
|
||||
add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
|
||||
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance)
|
||||
target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
|
||||
add_dependencies(test_normalization test_layernorm2d_fp32)
|
||||
add_dependencies(test_normalization test_groupnorm_fp32)
|
||||
endif()
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
|
||||
target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance)
|
||||
target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
|
||||
add_dependencies(test_normalization test_layernorm2d_fp16)
|
||||
add_dependencies(test_normalization test_groupnorm_fp16)
|
||||
endif()
|
||||
|
||||
@@ -41,8 +41,12 @@ class TestAvgPool2dFwd : public ::testing::Test
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __fp16__
|
||||
using KernelTypes =
|
||||
::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
|
||||
#else
|
||||
using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
|
||||
#endif
|
||||
|
||||
TYPED_TEST_SUITE(TestAvgPool2dFwd, KernelTypes);
|
||||
TYPED_TEST(TestAvgPool2dFwd, Test_Pool)
|
||||
|
||||
@@ -40,10 +40,12 @@ class TestAvgPool3dFwd : public ::testing::Test
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __fp16__
|
||||
using KernelTypes =
|
||||
::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
|
||||
|
||||
#else
|
||||
using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
|
||||
#endif
|
||||
TYPED_TEST_SUITE(TestAvgPool3dFwd, KernelTypes);
|
||||
TYPED_TEST(TestAvgPool3dFwd, Test_Pool)
|
||||
{
|
||||
|
||||
@@ -59,10 +59,12 @@ class TestMaxPool2dFwd : public ::testing::Test
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __fp16__
|
||||
using KernelTypes =
|
||||
::testing::Types<std::tuple<F16, F16, F16, I32>, std::tuple<F32, F32, F32, I32>>;
|
||||
|
||||
#else
|
||||
using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
|
||||
#endif
|
||||
TYPED_TEST_SUITE(TestMaxPool2dFwd, KernelTypes);
|
||||
TYPED_TEST(TestMaxPool2dFwd, Test_Pool)
|
||||
{
|
||||
|
||||
@@ -60,8 +60,12 @@ class TestMaxPool3dFwd : public ::testing::Test
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __fp16__
|
||||
using KernelTypes =
|
||||
::testing::Types<std::tuple<F16, F16, F16, I32>, std::tuple<F32, F32, F32, I32>>;
|
||||
::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
|
||||
#else
|
||||
using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
|
||||
#endif
|
||||
|
||||
TYPED_TEST_SUITE(TestMaxPool3dFwd, KernelTypes);
|
||||
TYPED_TEST(TestMaxPool3dFwd, Test_Pool)
|
||||
|
||||
@@ -10,8 +10,9 @@
|
||||
|
||||
template <ck::index_t N>
|
||||
using I = ck::Number<N>;
|
||||
|
||||
#ifdef __fp16__
|
||||
using F16 = ck::half_t;
|
||||
#endif
|
||||
using F32 = float;
|
||||
|
||||
template <typename Tuple>
|
||||
@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple>
|
||||
// clang-format off
|
||||
using KernelTypes = ::testing::Types<
|
||||
// InDataType, AccDataType, OutDataType, Rank
|
||||
#ifdef __fp16__
|
||||
std::tuple< F16, F32, F16, I<3>>,
|
||||
#endif
|
||||
std::tuple< F32, F32, F32, I<3>>
|
||||
>;
|
||||
// clang-format on
|
||||
|
||||
@@ -10,8 +10,9 @@
|
||||
|
||||
template <ck::index_t N>
|
||||
using I = ck::Number<N>;
|
||||
|
||||
#ifdef __fp16__
|
||||
using F16 = ck::half_t;
|
||||
#endif
|
||||
using F32 = float;
|
||||
|
||||
template <typename Tuple>
|
||||
@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple>
|
||||
// clang-format off
|
||||
using KernelTypes = ::testing::Types<
|
||||
// InDataType, AccDataType, OutDataType, Rank
|
||||
#ifdef __fp16__
|
||||
std::tuple< F16, F32, F16, I<4>>,
|
||||
#endif
|
||||
std::tuple< F32, F32, F32, I<4>>
|
||||
>;
|
||||
// clang-format on
|
||||
|
||||
Reference in New Issue
Block a user