diff --git a/example/69_gemm_add_relu/CMakeLists.txt b/example/69_gemm_add_relu/CMakeLists.txt index 936e9acea3..fe9d783755 100644 --- a/example/69_gemm_add_relu/CMakeLists.txt +++ b/example/69_gemm_add_relu/CMakeLists.txt @@ -1,9 +1,7 @@ add_custom_target(example_gemm_add_relu_xdl) -add_library(example_gemm_add_relu_xdl_fp16 gemm_add_relu_xdl_fp16.cpp) add_example_executable(example_gemm_add_relu_xdl_fp16 gemm_add_relu_xdl_fp16.cpp) -add_library(example_gemm_add_relu_xdl_bf16 gemm_add_relu_xdl_bf16.cpp) add_example_executable(example_gemm_add_relu_xdl_bf16 gemm_add_relu_xdl_bf16.cpp) @@ -12,8 +10,6 @@ add_example_executable(example_gemm_add_relu_wmma_bf16 gemm_add_relu_wmma_bf16.c add_example_executable(example_gemm_add_relu_wmma_fp16 gemm_add_relu_wmma_fp16.cpp) -add_example_executable(example_gemm_add_relu_wmma_v3_fp16 gemm_add_relu_wmma_v3_fp16.cpp) -add_example_executable(example_gemm_add_relu_wmma_v3_bf16 gemm_add_relu_wmma_v3_bf16.cpp) diff --git a/example/69_gemm_add_relu/gemm_add_relu_wmma_v3_bf16.cpp b/example/69_gemm_add_relu/gemm_add_relu_wmma_bf16.cpp similarity index 97% rename from example/69_gemm_add_relu/gemm_add_relu_wmma_v3_bf16.cpp rename to example/69_gemm_add_relu/gemm_add_relu_wmma_bf16.cpp index 85b2a65b20..c91f2220aa 100644 --- a/example/69_gemm_add_relu/gemm_add_relu_wmma_v3_bf16.cpp +++ b/example/69_gemm_add_relu/gemm_add_relu_wmma_bf16.cpp @@ -73,6 +73,6 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleD_Wmma_ // clang-format on -#include "run_gemm_add_relu_example_v3.inc" +#include "run_gemm_add_relu_example.inc" int main(int argc, char* argv[]) { return !run_gemm_add_relu_example(argc, argv); } diff --git a/example/69_gemm_add_relu/gemm_add_relu_wmma_v3_fp16.cpp b/example/69_gemm_add_relu/gemm_add_relu_wmma_fp16.cpp similarity index 96% rename from example/69_gemm_add_relu/gemm_add_relu_wmma_v3_fp16.cpp rename to example/69_gemm_add_relu/gemm_add_relu_wmma_fp16.cpp index 1a172b20a6..1d7febe66e 100644 --- a/example/69_gemm_add_relu/gemm_add_relu_wmma_v3_fp16.cpp +++ b/example/69_gemm_add_relu/gemm_add_relu_wmma_fp16.cpp @@ -71,6 +71,6 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleD_Wmma_ // clang-format on -#include "run_gemm_add_relu_example_v3.inc" +#include "run_gemm_add_relu_example.inc" int main(int argc, char* argv[]) { return !run_gemm_add_relu_example(argc, argv); } diff --git a/example/69_gemm_add_relu/run_gemm_add_relu_example_v3.inc b/example/69_gemm_add_relu/run_gemm_add_relu_example.inc similarity index 100% rename from example/69_gemm_add_relu/run_gemm_add_relu_example_v3.inc rename to example/69_gemm_add_relu/run_gemm_add_relu_example.inc diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp index 2cc7cab5e6..0792e3eb89 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp @@ -45,30 +45,30 @@ void add_device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instan #elif defined(CK_USE_WMMA) void add_device_gemm_add_relu_wmma_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instances( - std::vector>>&); + std::vector>>&); void add_device_gemm_add_relu_wmma_c_shuffle_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instances( - std::vector>>&); + std::vector>>&); #endif // GEMM + Add + Relu @@ -137,7 +137,7 @@ struct DeviceOperationInstanceFactory< #endif #elif defined(CK_USE_WMMA) - // For wmma ADataType must be same as BDatatype. + #if defined(CK_ENABLE_FP16) if constexpr(is_same_v && is_same_v && is_same_v && is_same_v) @@ -151,7 +151,6 @@ struct DeviceOperationInstanceFactory< } #endif -// For wmma ADataType must be same as BDatatype. #if defined(CK_ENABLE_BF16) if constexpr(is_same_v && is_same_v && is_same_v && is_same_v) diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt index 8fb7f7fb72..28e0ccb33d 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt @@ -4,8 +4,5 @@ add_instance_library(device_gemm_add_relu_instance device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp device_gemm_add_relu_wmma_c_shuffle_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance.cpp device_gemm_add_relu_wmma_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp - device_gemm_add_relu_wmma_c_shuffle_v3_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance.cpp - device_gemm_add_relu_wmma_c_shuffle_v3_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp ) - diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_v3_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance.cpp similarity index 100% rename from library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_v3_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_v3_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp similarity index 100% rename from library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_v3_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_wmma_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp