mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 04:19:36 +00:00
Split the instances by architecture. (#1223)
* parse examples inside the add_example_executable function * fix the example 64 cmake file * add xdl flag to the gemm_bias_softmax_gemm_permute example * add filtering of tests based on architecture type * enable test_grouped_gemm for gfx9 only * enable test_transpose only for gfx9 * only linnk test_transpose if it gets built * split the gemm instances by architectures * split gemm_bilinear,grouped_conv_bwd_weight instances by targets * split instances by architecture * split grouped_conv instances by architecture * fix clang format * fix the if-else logic in group_conv headers * small fix for grouped convolution instances * fix the grouped conv bwd weight dl instances * fix client examples * only enable client examples 3 and 4 on gfx9 * set the gfx9 macro * make sure the architecture macros are set by cmake * use separate set of xdl/wmma flags for host code * sinmplify the main cmake file * add conv_fwd_bf8 instance declaration
This commit is contained in:
@@ -36,12 +36,27 @@ function(add_instance_library INSTANCE_NAME)
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
# Do not build DL instances if DL_KERNELS macro is not set
|
||||
foreach(source IN LISTS ARGN)
|
||||
if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl")
|
||||
message("removing dl instance ${source} ")
|
||||
list(REMOVE_ITEM ARGN "${source}")
|
||||
endif()
|
||||
endforeach()
|
||||
# Do not build XDL instances if gfx9 targets are not on the target list
|
||||
foreach(source IN LISTS ARGN)
|
||||
if(NOT GPU_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl")
|
||||
message("removing xdl instance ${source} ")
|
||||
list(REMOVE_ITEM ARGN "${source}")
|
||||
endif()
|
||||
endforeach()
|
||||
# Do not build WMMA instances if gfx11 targets are not on the target list
|
||||
foreach(source IN LISTS ARGN)
|
||||
if(NOT GPU_TARGETS MATCHES "gfx11" AND source MATCHES "_wmma")
|
||||
message("removing wmma instance ${source} ")
|
||||
list(REMOVE_ITEM ARGN "${source}")
|
||||
endif()
|
||||
endforeach()
|
||||
#only continue if there are some source files left on the list
|
||||
if(ARGN)
|
||||
add_library(${INSTANCE_NAME} OBJECT ${ARGN})
|
||||
@@ -124,6 +139,26 @@ FOREACH(subdir_path ${dir_list})
|
||||
message("Found only dl instances, but DL_KERNELS is not set. Skipping.")
|
||||
set(add_inst 0)
|
||||
endif()
|
||||
if(("${cmake_instance}" MATCHES "ONLY XDL_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx9"))
|
||||
message("Found only xdl instances, but gfx9 is not on the targets list. Skipping.")
|
||||
set(add_inst 0)
|
||||
endif()
|
||||
if(("${cmake_instance}" MATCHES "ONLY WMMA_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx11"))
|
||||
message("Found only wmma instances, but gfx11 is not on the targets list. Skipping.")
|
||||
set(add_inst 0)
|
||||
endif()
|
||||
if(("${cmake_instance}" MATCHES "ONLY XDL_AND_DL_KERNELS") AND (NOT DEFINED DL_KERNELS) AND (NOT GPU_TARGETS MATCHES "gfx9"))
|
||||
message("Found only xdl and dl instances, but gfx9 is not on the targets listand DL_KERNELS is not set. Skipping.")
|
||||
set(add_inst 0)
|
||||
endif()
|
||||
if(("${cmake_instance}" MATCHES "ONLY XDL_AND_WMMA_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx11") AND (NOT GPU_TARGETS MATCHES "gfx9"))
|
||||
message("Found only xdl and wmma instances, but gfx11 and gfx9 are not on the targets list. Skipping.")
|
||||
set(add_inst 0)
|
||||
endif()
|
||||
if(("${cmake_instance}" MATCHES "XDL_DL_WMMA_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx11") AND (NOT GPU_TARGETS MATCHES "gfx9") AND (NOT DEFINED DL_KERNELS))
|
||||
message("Found xdl, dl, and wmma instances, but none of those meet the target list. Skipping.")
|
||||
set(add_inst 0)
|
||||
endif()
|
||||
if((add_inst EQUAL 1))
|
||||
get_filename_component(target_dir ${subdir_path} NAME)
|
||||
add_subdirectory(${target_dir})
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(BATCHED_GEMM_INSTANCES)
|
||||
list(APPEND BATCHED_GEMM_INSTANCES device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp
|
||||
device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_batched_gemm_add_relu_gemm_add_instance
|
||||
device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp
|
||||
device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_batched_gemm_bias_permute_instance
|
||||
device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp
|
||||
)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_batched_gemm_gemm_instance
|
||||
device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp
|
||||
device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_batched_gemm_reduce_instance
|
||||
device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp
|
||||
device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_batched_gemm_softmax_gemm_instance
|
||||
device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp
|
||||
)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(DEVICE_BATCHED_GEMM_SOFTMAX_GEMM_PERMUTE_INSTANCES)
|
||||
list(APPEND DEVICE_BATCHED_GEMM_SOFTMAX_GEMM_PERMUTE_INSTANCES
|
||||
device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(DEVICE_CONTRACTION_BILINEAR_INSTANCES)
|
||||
|
||||
# FP32
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(DEVICE_CONTRACTION_SCALE_INSTANCES)
|
||||
|
||||
# FP32
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_conv1d_bwd_data_instance
|
||||
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp
|
||||
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_DL_KERNELS
|
||||
set(CONV2D_BWD_DATA_INSTANCES)
|
||||
list(APPEND CONV2D_BWD_DATA_INSTANCES device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp
|
||||
device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(DEVICE_CONV2D_FWD_INSTANCES)
|
||||
list(APPEND DEVICE_CONV2D_FWD_INSTANCES device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp
|
||||
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_conv2d_fwd_bias_relu_instance
|
||||
device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp
|
||||
)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_conv2d_fwd_bias_relu_add_instance
|
||||
device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp
|
||||
)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_conv3d_bwd_data_instance
|
||||
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp
|
||||
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_add_instance
|
||||
device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
|
||||
device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_add_add_fastgelu_instance
|
||||
device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp
|
||||
device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_add_fastgelu_instance
|
||||
device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
|
||||
device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_add_multiply_instance
|
||||
device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp
|
||||
device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_add_relu_instance
|
||||
device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
|
||||
device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_add_relu_add_layernorm_instance
|
||||
device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp
|
||||
device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_add_silu_instance
|
||||
device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
|
||||
device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_bias_add_reduce_instance
|
||||
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp
|
||||
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_WMMA_KERNELS
|
||||
add_instance_library(device_gemm_bilinear_instance
|
||||
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp
|
||||
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_fastgelu_instance
|
||||
device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp
|
||||
device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GEMM_MULTIPLY_ADD_INSTANCES)
|
||||
list(APPEND GEMM_MULTIPLY_ADD_INSTANCES device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp
|
||||
device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_reduce_instance
|
||||
device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp
|
||||
device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GEMM_SPLITK_INSTANCES)
|
||||
|
||||
list(APPEND GEMM_SPLITK_INSTANCES
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_gemm_streamk_instance
|
||||
# device_gemm_xdl_streamk_f32_f32_f32_mk_kn_mn_instance.cpp
|
||||
# device_gemm_xdl_streamk_f32_f32_f32_mk_nk_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_DL_KERNELS
|
||||
set(GROUPED_CONV1D_BWD_WEIGHT
|
||||
xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp
|
||||
xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_grouped_conv1d_fwd_instance
|
||||
xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp
|
||||
xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_WMMA_KERNELS
|
||||
add_instance_library(
|
||||
device_grouped_conv2d_bwd_data_instance
|
||||
xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_DL_KERNELS
|
||||
set(GROUPED_CONV2D_BWD_WEIGHT
|
||||
xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp
|
||||
xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# XDL_DL_WMMA_KERNELS
|
||||
add_instance_library(device_grouped_conv2d_fwd_instance
|
||||
#xdl
|
||||
# GNHWC, GKYXC, GNHWK
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_WMMA_KERNELS
|
||||
set(GROUPED_CONV3D_BWD_DATA
|
||||
xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
|
||||
xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GROUPED_CONV3D_BWD_DATA_BILINEAR
|
||||
xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
|
||||
xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GROUPED_CONV3D_BWD_DATA_BILINEAR
|
||||
xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
|
||||
xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# XDL_DL_WMMA_KERNELS
|
||||
set(GROUPED_CONV3D_BWD_WEIGHT
|
||||
xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
|
||||
xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_WMMA_KERNELS
|
||||
set(GROUPED_CONV3D_FWD
|
||||
xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp
|
||||
xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GROUPED_CONV3D_FWD_BILINEAR
|
||||
xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
|
||||
xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GROUPED_CONV3D_FWD_BILINEAR
|
||||
xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
|
||||
xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GROUPED_CONV3D_FWD_SCALEADD_AB
|
||||
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
|
||||
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GROUPED_CONV3D_FWD_scaleadd_scaleadd_RELU
|
||||
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
|
||||
xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_grouped_gemm_instance
|
||||
device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp
|
||||
device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_grouped_gemm_bias_instance
|
||||
device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp
|
||||
device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
add_instance_library(device_grouped_gemm_fastgelu_instance
|
||||
device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp
|
||||
device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_KERNELS
|
||||
set(GROUPED_GEMM_FIXED_NK_INSTANCES)
|
||||
|
||||
list(APPEND GROUPED_GEMM_FIXED_NK_INSTANCES device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# ONLY XDL_AND_DL_KERNELS
|
||||
set(CONV2D_PERLAYER_QUANT_SRC conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp)
|
||||
set(CONV2D_PERCHANNEL_QUANT_SRC conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp)
|
||||
set(CONV2D_BIAS_PERLAYER_QUANT_SRC conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp)
|
||||
|
||||
Reference in New Issue
Block a user