From fb1cbf025b33945257b36f065a426d9dffc9fa03 Mon Sep 17 00:00:00 2001 From: cloudhan Date: Sun, 14 Aug 2022 01:17:58 +0800 Subject: [PATCH] Change all device operations to use add_instance_library (#338) * Change all device operations to use add_instance_library to avoid duplicated cmake configuration. * update DeviceMem Co-authored-by: Chao Liu --- .../ck/library/utility/device_memory.hpp | 31 ++++--- .../gpu/CMakeLists.txt | 1 + .../gpu/batched_gemm/CMakeLists.txt | 42 ++++----- .../gpu/batched_gemm_gemm/CMakeLists.txt | 7 +- .../gpu/batched_gemm_reduce/CMakeLists.txt | 7 +- .../batched_gemm_softmax_gemm/CMakeLists.txt | 6 +- .../gpu/contraction_bilinear/CMakeLists.txt | 7 +- .../gpu/contraction_scale/CMakeLists.txt | 7 +- .../gpu/conv1d_bwd_data/CMakeLists.txt | 18 ++-- .../gpu/conv1d_bwd_weight/CMakeLists.txt | 16 +--- .../gpu/conv2d_bwd_data/CMakeLists.txt | 16 ++-- .../gpu/conv2d_bwd_weight/CMakeLists.txt | 15 +--- .../gpu/conv2d_fwd/CMakeLists.txt | 17 ++-- .../gpu/conv2d_fwd_bias_relu/CMakeLists.txt | 9 +- .../conv2d_fwd_bias_relu_add/CMakeLists.txt | 8 +- .../gpu/conv3d_bwd_data/CMakeLists.txt | 18 ++-- .../gpu/conv3d_bwd_weight/CMakeLists.txt | 16 +--- .../gpu/elementwise/CMakeLists.txt | 9 +- .../gpu/gemm/CMakeLists.txt | 89 +++++++++---------- .../gpu/gemm_add_add_fastgelu/CMakeLists.txt | 18 ++-- .../gpu/gemm_bias_add_reduce/CMakeLists.txt | 9 +- .../gpu/gemm_bilinear/CMakeLists.txt | 16 ++-- .../gpu/gemm_reduce/CMakeLists.txt | 6 +- .../gpu/gemm_splitk/CMakeLists.txt | 23 ++--- .../gpu/grouped_conv1d_fwd/CMakeLists.txt | 16 ++-- .../gpu/grouped_conv2d_fwd/CMakeLists.txt | 18 ++-- .../gpu/grouped_conv3d_fwd/CMakeLists.txt | 16 ++-- .../gpu/grouped_gemm/CMakeLists.txt | 19 ++-- .../gpu/normalization/CMakeLists.txt | 8 +- .../gpu/reduce/CMakeLists.txt | 50 +++++------ library/src/utility/device_memory.cpp | 10 +-- 31 files changed, 190 insertions(+), 358 deletions(-) diff --git a/library/include/ck/library/utility/device_memory.hpp b/library/include/ck/library/utility/device_memory.hpp index 5667db7fc7..3c4ece4406 100644 --- a/library/include/ck/library/utility/device_memory.hpp +++ b/library/include/ck/library/utility/device_memory.hpp @@ -18,23 +18,26 @@ struct DeviceMem { DeviceMem() = delete; DeviceMem(std::size_t mem_size); - void* GetDeviceBuffer(); - std::size_t GetBufferSize(); - void ToDevice(const void* p); - void FromDevice(void* p); - void SetZero(); + void* GetDeviceBuffer() const; + std::size_t GetBufferSize() const; + void ToDevice(const void* p) const; + void FromDevice(void* p) const; + void SetZero() const; template - void SetValue(T x) - { - if(mMemSize % sizeof(T) != 0) - { - throw std::runtime_error("wrong! not entire DeviceMem will be set"); - } - - set_buffer_value<<<1, 1024>>>(static_cast(mpDeviceBuf), x, mMemSize / sizeof(T)); - } + void SetValue(T x) const; ~DeviceMem(); void* mpDeviceBuf; std::size_t mMemSize; }; + +template +void DeviceMem::SetValue(T x) const +{ + if(mMemSize % sizeof(T) != 0) + { + throw std::runtime_error("wrong! not entire DeviceMem will be set"); + } + + set_buffer_value<<<1, 1024>>>(static_cast(mpDeviceBuf), x, mMemSize / sizeof(T)); +} diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index 74fcc47206..6f3f900b8a 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -3,6 +3,7 @@ function(add_instance_library INSTANCE_NAME) add_library(${INSTANCE_NAME} OBJECT ${ARGN}) target_compile_features(${INSTANCE_NAME} PUBLIC) set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) + clang_tidy_check(${INSTANCE_NAME}) endfunction(add_instance_library INSTANCE_NAME) add_subdirectory(gemm) diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt index 016c85f673..0f2a739199 100644 --- a/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt @@ -1,26 +1,18 @@ -#device_batched_gemm_instance -set(DEVICE_BATCHED_GEMM_INSTANCE_SOURCE - device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp; - device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp; - device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp; - device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp; - device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp; - device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp; - device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp; - device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp; - device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp; +add_instance_library(device_batched_gemm_instance + device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp + device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp + device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp + device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp + device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp + device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp + device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp + device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp + device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp + device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp + device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp + device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp + device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp + device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp + device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp + device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp ) - -add_library(device_batched_gemm_instance OBJECT ${DEVICE_BATCHED_GEMM_INSTANCE_SOURCE}) -# target_compile_features(device_batched_gemm_instance PUBLIC) -set_target_properties(device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -# install(TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib) - -clang_tidy_check(device_batched_gemm_instance) diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt index 34e7b6b9ab..e0968a99ac 100644 --- a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt @@ -1,8 +1,3 @@ -set(DEVICE_BATCHED_GEMM_GEMM_INSTANCE_SOURCE +add_instance_library(device_batched_gemm_gemm_instance device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp ) - -add_instance_library(device_batched_gemm_gemm_instance OBJECT ${DEVICE_BATCHED_GEMM_GEMM_INSTANCE_SOURCE}) -target_compile_features(device_batched_gemm_gemm_instance PUBLIC) -set_target_properties(device_batched_gemm_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -clang_tidy_check(device_batched_gemm_gemm_instance) \ No newline at end of file diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt index 0606df01f1..db3719cff8 100644 --- a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt @@ -1,12 +1,7 @@ -set(DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE +add_instance_library(device_batched_gemm_reduce_instance device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp ) -add_instance_library(device_batched_gemm_reduce_instance OBJECT ${DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE}) -target_compile_features(device_batched_gemm_reduce_instance PUBLIC) -set_target_properties(device_batched_gemm_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -clang_tidy_check(device_batched_gemm_reduce_instance) - diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt index 5e14c5ebb2..29fce56610 100644 --- a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt @@ -1,8 +1,4 @@ -set(DEVICE_BATCHED_GEMM_SOFTMAX_GEMM_INSTANCE_SOURCE +add_instance_library(device_batched_gemm_softmax_gemm_instance device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp ) -add_instance_library(device_batched_gemm_softmax_gemm_instance OBJECT ${DEVICE_BATCHED_GEMM_SOFTMAX_GEMM_INSTANCE_SOURCE}) -target_compile_features(device_batched_gemm_softmax_gemm_instance PUBLIC) -set_target_properties(device_batched_gemm_softmax_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -clang_tidy_check(device_batched_gemm_softmax_gemm_instance) \ No newline at end of file diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt index fb38c645eb..ffd6a6a7be 100644 --- a/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt @@ -1,12 +1,7 @@ -# device_contraction_bilinear_instance -set(DEVICE_CONTRACTION_BILINEAR_INSTANCE_SOURCE +add_instance_library(device_contraction_bilinear_instance device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp ) -add_library(device_contraction_bilinear_instance OBJECT ${DEVICE_CONTRACTION_BILINEAR_INSTANCE_SOURCE}) -set_target_properties(device_contraction_bilinear_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_contraction_bilinear_instance) diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt index 32806757a5..7ad6605486 100644 --- a/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt @@ -1,12 +1,7 @@ -# device_contraction_scale_instance -set(DEVICE_CONTRACTION_SCALE_INSTANCE_SOURCE +add_instance_library(device_contraction_scale_instance device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp ) -add_library(device_contraction_scale_instance OBJECT ${DEVICE_CONTRACTION_SCALE_INSTANCE_SOURCE}) -set_target_properties(device_contraction_scale_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_contraction_scale_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt index fc72bed39f..75a3670761 100644 --- a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt @@ -1,14 +1,6 @@ -# device_conv1d_bwd_data_instance -set(DEVICE_CONV1D_BWD_DATA_INSTANCE_SOURCE - device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp; - device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp; - device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp; - device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp; +add_instance_library(device_conv1d_bwd_data_instance + device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp + device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp + device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp + device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp ) - -add_library(device_conv1d_bwd_data_instance OBJECT ${DEVICE_CONV1D_BWD_DATA_INSTANCE_SOURCE}) -target_compile_features(device_conv1d_bwd_data_instance PUBLIC) -set_target_properties(device_conv1d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -rocm_install(TARGETS device_conv1d_bwd_data_instance) - -clang_tidy_check(device_conv1d_bwd_data_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv1d_bwd_weight/CMakeLists.txt index 5b80510899..86fd564ea3 100644 --- a/library/src/tensor_operation_instance/gpu/conv1d_bwd_weight/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv1d_bwd_weight/CMakeLists.txt @@ -1,13 +1,5 @@ -#device_conv1d_bwd_weight_instance -set(DEVICE_CONV1D_BWD_WEIGHT_INSTANCE_SOURCE - device_conv1d_bwd_weight_xdl_nwc_kxc_nwk_f16_instance.cpp; - device_conv1d_bwd_weight_xdl_nwc_kxc_nwk_f32_instance.cpp; - device_conv1d_bwd_weight_xdl_nwc_kxc_nwk_bf16_instance.cpp; +add_instance_library(device_conv1d_bwd_weight_instance + device_conv1d_bwd_weight_xdl_nwc_kxc_nwk_f16_instance.cpp + device_conv1d_bwd_weight_xdl_nwc_kxc_nwk_f32_instance.cpp + device_conv1d_bwd_weight_xdl_nwc_kxc_nwk_bf16_instance.cpp ) - -add_library(device_conv1d_bwd_weight_instance OBJECT ${DEVICE_CONV1D_BWD_WEIGHT_INSTANCE_SOURCE}) -target_compile_features(device_conv1d_bwd_weight_instance PUBLIC) -set_target_properties(device_conv1d_bwd_weight_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -rocm_install(TARGETS device_conv1d_bwd_weight_instance) - -clang_tidy_check(device_conv1d_bwd_weight_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt index d7882a7d8b..a443492f6e 100644 --- a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt @@ -1,12 +1,6 @@ -# device_conv2d_bwd_data_instance -set(DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE - device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp; - device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp; - device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp; - device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp; +add_instance_library(device_conv2d_bwd_data_instance + device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp + device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp + device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp + device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp ) - -add_library(device_conv2d_bwd_data_instance OBJECT ${DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE}) -set_target_properties(device_conv2d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_conv2d_bwd_data_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt index be60dc2aab..4e6bfa7fb7 100644 --- a/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt @@ -1,13 +1,6 @@ -#device_conv2d_bwd_weight_instance -set(DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE - device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f16_instance.cpp; - device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f32_instance.cpp; - device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp; +add_instance_library(device_conv2d_bwd_weight_instance + device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f16_instance.cpp + device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f32_instance.cpp + device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp ) -add_library(device_conv2d_bwd_weight_instance OBJECT ${DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE}) -target_compile_features(device_conv2d_bwd_weight_instance PUBLIC) -set_target_properties(device_conv2d_bwd_weight_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -rocm_install(TARGETS device_conv2d_bwd_weight_instance) - -clang_tidy_check(device_conv2d_bwd_weight_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt index 8d21aa2bc3..5b646852fc 100644 --- a/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt @@ -1,12 +1,7 @@ -# device_conv2d_fwd_instance -set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE - device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp; - device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp; - device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp; - device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp; - device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp; +add_instance_library(device_conv2d_fwd_instance + device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp + device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp + device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp + device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp + device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp ) - -add_library(device_conv2d_fwd_instance OBJECT ${DEVICE_CONV2D_FWD_INSTANCE_SOURCE}) -set_target_properties(device_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -clang_tidy_check(device_conv2d_fwd_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt index ad66c73bf8..670cd94fc9 100644 --- a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt @@ -1,8 +1,3 @@ -# device_conv2d_fwd_bias_relu_instance -set(DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE - device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp; +add_instance_library(device_conv2d_fwd_bias_relu_instance + device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp ) -add_library(device_conv2d_fwd_bias_relu_instance OBJECT ${DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE}) -set_target_properties(device_conv2d_fwd_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_conv2d_fwd_bias_relu_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt index 36b1f6c153..68d5f582fd 100644 --- a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt @@ -1,8 +1,4 @@ -# device_conv2d_fwd_bias_relu_add_instance -set(DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE - device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp; +add_instance_library(device_conv2d_fwd_bias_relu_add_instance + device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp ) -add_library(device_conv2d_fwd_bias_relu_add_instance OBJECT ${DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE}) -set_target_properties(device_conv2d_fwd_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -clang_tidy_check(device_conv2d_fwd_bias_relu_add_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt index 215d4f7e86..db92208fd7 100644 --- a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt @@ -1,14 +1,6 @@ -# device_conv3d_bwd_data_instance -set(DEVICE_CONV3D_BWD_DATA_INSTANCE_SOURCE - device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp; - device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp; - device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp; - device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp; +add_instance_library(device_conv3d_bwd_data_instance + device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp + device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp + device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp + device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp ) - -add_library(device_conv3d_bwd_data_instance OBJECT ${DEVICE_CONV3D_BWD_DATA_INSTANCE_SOURCE}) -target_compile_features(device_conv3d_bwd_data_instance PUBLIC) -set_target_properties(device_conv3d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -rocm_install(TARGETS device_conv3d_bwd_data_instance) - -clang_tidy_check(device_conv3d_bwd_data_instance) diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv3d_bwd_weight/CMakeLists.txt index dfa03ea74a..931e6d7f32 100644 --- a/library/src/tensor_operation_instance/gpu/conv3d_bwd_weight/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/conv3d_bwd_weight/CMakeLists.txt @@ -1,13 +1,5 @@ -#device_conv3d_bwd_weight_instance -set(DEVICE_CONV3D_BWD_WEIGHT_INSTANCE_SOURCE - device_conv3d_bwd_weight_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp; - device_conv3d_bwd_weight_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp; - device_conv3d_bwd_weight_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp; +add_instance_library(device_conv3d_bwd_weight_instance + device_conv3d_bwd_weight_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp + device_conv3d_bwd_weight_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp + device_conv3d_bwd_weight_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp ) - -add_library(device_conv3d_bwd_weight_instance OBJECT ${DEVICE_CONV3D_BWD_WEIGHT_INSTANCE_SOURCE}) -target_compile_features(device_conv3d_bwd_weight_instance PUBLIC) -set_target_properties(device_conv3d_bwd_weight_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -rocm_install(TARGETS device_conv3d_bwd_weight_instance) - -clang_tidy_check(device_conv3d_bwd_weight_instance) diff --git a/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt index 465ba4e984..47516b4162 100644 --- a/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt @@ -1,10 +1,3 @@ -set(DEVICE_ELEMENTWISE_INSTANCE_SOURCE +add_instance_library(device_elementwise_instance device_normalize_instance.cpp ) - -add_instance_library(device_elementwise_instance ${DEVICE_ELEMENTWISE_INSTANCE_SOURCE}) - -target_compile_features(device_elementwise_instance PUBLIC) -set_target_properties(device_elementwise_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_elementwise_instance) diff --git a/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt index ce66b56a3e..e20d592c84 100644 --- a/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt @@ -1,48 +1,43 @@ -set(DEVICE_GEMM_INSTANCE_SOURCE - device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp; - device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp; - device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp; - device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp; - device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp; - device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp; - device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp; - device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp; - device_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp; - device_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp; - device_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp; - device_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp; - device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp; - device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp; - device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp; - device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp; - device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp; - device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp; - device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp; - device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp; - device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp; - device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp; - device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp; - device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp; - device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp; - device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp; +add_instance_library(device_gemm_instance + device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp + device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp + device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp + device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp + device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp + device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp + device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp + device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp + device_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp + device_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp + device_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp + device_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp + device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp + device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp + device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp + device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp + device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp + device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp + device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp + device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp + device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp + device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp + device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp + device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp + device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp + device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp ) - -add_library(device_gemm_instance OBJECT ${DEVICE_GEMM_INSTANCE_SOURCE}) - -target_compile_features(device_gemm_instance PUBLIC) -set_target_properties(device_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt index 194748ba67..bbf81a5fa2 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt @@ -1,14 +1,6 @@ -# device_gemm_add_add_fastgelu_instance -set(DEVICE_GEMM_ADD_ADD_FASTGELU_INSTANCE_SOURCE - device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp; - device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp; - device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp; - device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp; +add_instance_library(device_gemm_add_add_fastgelu_instance + device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp + device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp + device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp + device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp ) - -add_library(device_gemm_add_add_fastgelu_instance OBJECT ${DEVICE_GEMM_ADD_ADD_FASTGELU_INSTANCE_SOURCE}) - -target_compile_features(device_gemm_add_add_fastgelu_instance PUBLIC) -set_target_properties(device_gemm_add_add_fastgelu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_gemm_add_add_fastgelu_instance) diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt index 85a7f3f061..ccada3a85e 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt @@ -1,13 +1,6 @@ -set(DEVICE_GEMM_BIAS_ADD_REDUCE_INSTANCE_SOURCE +add_instance_library(device_gemm_bias_add_reduce_instance device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp ) - -add_library(device_gemm_bias_add_reduce_instance OBJECT ${DEVICE_GEMM_BIAS_ADD_REDUCE_INSTANCE_SOURCE}) - -target_compile_features(device_gemm_bias_add_reduce_instance PUBLIC) -set_target_properties(device_gemm_bias_add_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_gemm_bias_add_reduce_instance) diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt index 6bbebb7576..cb1b3a486f 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt @@ -1,12 +1,6 @@ -# device_gemm_bilinear_instance -set(DEVICE_GEMM_BILINEAR_INSTANCE_SOURCE - device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp; - device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp; - device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp; - device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp; +add_instance_library(device_gemm_bilinear_instance + device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp + device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp + device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp + device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp ) - -add_library(device_gemm_bilinear_instance OBJECT ${DEVICE_GEMM_BILINEAR_INSTANCE_SOURCE}) -set_target_properties(device_gemm_bilinear_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_gemm_bilinear_instance) diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt index 5fbdc28d7b..2b2cf8c774 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt @@ -1,10 +1,6 @@ -set(DEVICE_GEMM_REDUCE_INSTANCE_SOURCE +add_instance_library(device_gemm_reduce_instance device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp ) - -add_instance_library(device_gemm_reduce_instance ${DEVICE_GEMM_REDUCE_INSTANCE_SOURCE}) -rocm_install(TARGETS device_gemm_reduce_instance) -clang_tidy_check(device_gemm_reduce_instance) diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt index 3700ddf19d..6b33622746 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt @@ -1,15 +1,10 @@ -set(DEVICE_GEMM_SPLITK_INSTANCE_SOURCE - device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp; - device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp; - device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp; - device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp; - device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp; - device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp; - device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp; - device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp; +add_instance_library(device_gemm_splitk_instance + device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp + device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp + device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp + device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp + device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp + device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp + device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp + device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp ) - -add_library(device_gemm_splitk_instance OBJECT ${DEVICE_GEMM_SPLITK_INSTANCE_SOURCE}) - -target_compile_features(device_gemm_splitk_instance PUBLIC) -set_target_properties(device_gemm_splitk_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt index 43763f4675..1d90593e37 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt @@ -1,12 +1,6 @@ -# device_grouped_conv1d_fwd_instance -set(DEVICE_GROUPED_CONV1D_FWD_INSTANCE_SOURCE - device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp; - device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp; - device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp; - device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp; +add_instance_library(device_grouped_conv1d_fwd_instance + device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp + device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp + device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp + device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp ) - -add_library(device_grouped_conv1d_fwd_instance OBJECT ${DEVICE_GROUPED_CONV1D_FWD_INSTANCE_SOURCE}) -set_target_properties(device_grouped_conv1d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_grouped_conv1d_fwd_instance) diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt index cc243385f3..0d2d7f846a 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt @@ -1,15 +1,9 @@ -# device_grouped_conv2d_fwd_instance -set(DEVICE_GROUPED_CONV2D_FWD_INSTANCE_SOURCE +add_instance_library(device_grouped_conv2d_fwd_instance # GNHWC, GKYXC, GNHWK - device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp; - device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp; - device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp; - device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_int8_instance.cpp; + device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp + device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp + device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp + device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_int8_instance.cpp # NHWGC, GKYXC, NHWGK - device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp; + device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp ) - -add_library(device_grouped_conv2d_fwd_instance OBJECT ${DEVICE_GROUPED_CONV2D_FWD_INSTANCE_SOURCE}) -set_target_properties(device_grouped_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_grouped_conv2d_fwd_instance) diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt index ab7f60bf7f..5dc20332e8 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt @@ -1,12 +1,6 @@ -# device_grouped_conv3d_fwd_instance -set(DEVICE_GROUPED_CONV3D_FWD_INSTANCE_SOURCE - device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp; - device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp; - device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp; - device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp; +add_library(device_grouped_conv3d_fwd_instance + device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp + device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp + device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp + device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp ) - -add_library(device_grouped_conv3d_fwd_instance OBJECT ${DEVICE_GROUPED_CONV3D_FWD_INSTANCE_SOURCE}) -set_target_properties(device_grouped_conv3d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_grouped_conv3d_fwd_instance) diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt index 4d1115ceb6..82beb2ace2 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt @@ -1,15 +1,6 @@ -# device_grouped_gemm_instance -set(DEVICE_GROUPED_GEMM_INSTANCE_SOURCE - device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp; - device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp; - device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp; - device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp; +add_instance_library(device_grouped_gemm_instance + device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp + device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp + device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp + device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp ) - -add_library(device_grouped_gemm_instance OBJECT ${DEVICE_GROUPED_GEMM_INSTANCE_SOURCE}) - -target_compile_features(device_grouped_gemm_instance PUBLIC) -set_target_properties(device_grouped_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) -rocm_install(TARGETS device_grouped_gemm_instance) - -clang_tidy_check(device_grouped_gemm_instance) diff --git a/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt index a38539dcb7..17159fc9e4 100644 --- a/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt @@ -1,12 +1,6 @@ -# device_normalization_instance -set(DEVICE_NORMALIZATION_INSTANCE_SOURCE +add_instance_library(device_normalization_instance device_layernorm_f16_instance.cpp device_layernorm_f32_instance.cpp device_softmax_f32_f32_instance.cpp device_softmax_f16_f16_instance.cpp ) - -add_library(device_normalization_instance OBJECT ${DEVICE_NORMALIZATION_INSTANCE_SOURCE}) -set_target_properties(device_normalization_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_normalization_instance) diff --git a/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt index d566796c13..4eddd6b644 100644 --- a/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt @@ -1,29 +1,23 @@ -# device_reduce_instance -set(DEVICE_REDUCE_INSTANCE_SOURCE - device_reduce_instance_blockwise_f16_f16_f16.cpp; - device_reduce_instance_blockwise_f16_f32_f16.cpp; - device_reduce_instance_blockwise_f32_f32_f32.cpp; - device_reduce_instance_blockwise_f32_f64_f32.cpp; - device_reduce_instance_blockwise_f64_f64_f64.cpp; - device_reduce_instance_blockwise_i8_i32_i8.cpp; - device_reduce_instance_blockwise_i8_i8_i8.cpp; - device_reduce_instance_blockwise_b16_f32_b16.cpp; - device_reduce_instance_threadwise_f16_f16_f16.cpp; - device_reduce_instance_threadwise_f16_f32_f16.cpp; - device_reduce_instance_threadwise_f32_f32_f32.cpp; - device_reduce_instance_threadwise_f32_f64_f32.cpp; - device_reduce_instance_threadwise_f64_f64_f64.cpp; - device_reduce_instance_threadwise_i8_i32_i8.cpp; - device_reduce_instance_threadwise_i8_i8_i8.cpp; - device_reduce_instance_threadwise_b16_f32_b16.cpp; - device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp; - device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp; - device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp; - device_reduce_instance_multiblock_atomic_add_f64_f64_f64.cpp; - device_reduce_instance_multiblock_atomic_add_b16_f32_f32.cpp; +add_instance_library(device_reduce_instance + device_reduce_instance_blockwise_f16_f16_f16.cpp + device_reduce_instance_blockwise_f16_f32_f16.cpp + device_reduce_instance_blockwise_f32_f32_f32.cpp + device_reduce_instance_blockwise_f32_f64_f32.cpp + device_reduce_instance_blockwise_f64_f64_f64.cpp + device_reduce_instance_blockwise_i8_i32_i8.cpp + device_reduce_instance_blockwise_i8_i8_i8.cpp + device_reduce_instance_blockwise_b16_f32_b16.cpp + device_reduce_instance_threadwise_f16_f16_f16.cpp + device_reduce_instance_threadwise_f16_f32_f16.cpp + device_reduce_instance_threadwise_f32_f32_f32.cpp + device_reduce_instance_threadwise_f32_f64_f32.cpp + device_reduce_instance_threadwise_f64_f64_f64.cpp + device_reduce_instance_threadwise_i8_i32_i8.cpp + device_reduce_instance_threadwise_i8_i8_i8.cpp + device_reduce_instance_threadwise_b16_f32_b16.cpp + device_reduce_instance_multiblock_atomic_add_f16_f32_f32.cpp + device_reduce_instance_multiblock_atomic_add_f32_f32_f32.cpp + device_reduce_instance_multiblock_atomic_add_f32_f64_f32.cpp + device_reduce_instance_multiblock_atomic_add_f64_f64_f64.cpp + device_reduce_instance_multiblock_atomic_add_b16_f32_f32.cpp ) - -add_library(device_reduce_instance OBJECT ${DEVICE_REDUCE_INSTANCE_SOURCE}) -set_target_properties(device_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON) - -clang_tidy_check(device_reduce_instance) diff --git a/library/src/utility/device_memory.cpp b/library/src/utility/device_memory.cpp index 99d5248706..90f943313b 100644 --- a/library/src/utility/device_memory.cpp +++ b/library/src/utility/device_memory.cpp @@ -10,20 +10,20 @@ DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size) hip_check_error(hipMalloc(static_cast(&mpDeviceBuf), mMemSize)); } -void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; } +void* DeviceMem::GetDeviceBuffer() const { return mpDeviceBuf; } -std::size_t DeviceMem::GetBufferSize() { return mMemSize; } +std::size_t DeviceMem::GetBufferSize() const { return mMemSize; } -void DeviceMem::ToDevice(const void* p) +void DeviceMem::ToDevice(const void* p) const { hip_check_error(hipMemcpy(mpDeviceBuf, const_cast(p), mMemSize, hipMemcpyHostToDevice)); } -void DeviceMem::FromDevice(void* p) +void DeviceMem::FromDevice(void* p) const { hip_check_error(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost)); } -void DeviceMem::SetZero() { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); } +void DeviceMem::SetZero() const { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); } DeviceMem::~DeviceMem() { hip_check_error(hipFree(mpDeviceBuf)); }