From cdaff7f210f0b9849fb397eda84aedfa3b7f399b Mon Sep 17 00:00:00 2001 From: apoorva Date: Tue, 24 Jun 2025 20:17:46 +0000 Subject: [PATCH] Added instances to Cmake --- .../gpu/gemm_add_relu/CMakeLists.txt | 5 ++++- profiler/src/CMakeLists.txt | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt index 30cbadf3d8..1a4ed3a279 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt @@ -1,4 +1,4 @@ -# ONLY XDL_KERNELS +# XDL_AND_WMMA KERNELS add_instance_library(device_gemm_add_relu_instance device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp @@ -6,3 +6,6 @@ add_instance_library(device_gemm_add_relu_instance device_gemm_add_relu_wmma_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp ) + + +add_executable(device_gemm_add_relu_wmma_c_shuffle_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance device_gemm_add_relu_wmma_c_shuffle_bf16_bf16_bf16_bf16_mk_kn_mn_mn_instance.cpp) diff --git a/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt index 35a4e184a0..abb037484c 100644 --- a/profiler/src/CMakeLists.txt +++ b/profiler/src/CMakeLists.txt @@ -190,8 +190,9 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9") endif() if((SUPPORTED_GPU_TARGETS MATCHES "gfx9" AND (DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)) OR - (SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]")) + (SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]" )) list(APPEND DEVICE_INSTANCES device_gemm_bilinear_instance) + list(APPEND DEVICE_INSTANCES device_gemm_add_relu_instance) endif() if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]") @@ -205,6 +206,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[1 if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES) list(APPEND DEVICE_INSTANCES device_gemm_fastgelu_instance) list(APPEND DEVICE_INSTANCES device_gemm_add_add_fastgelu_instance) + list(APPEND DEVICE_INSTANCES device_gemm_add_add_relu_instance) endif() endif()