From 2929833ec7d60a945179a63b7132c9d39b5d5bb3 Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Tue, 2 Dec 2025 11:33:33 -0800 Subject: [PATCH] Disable gemm_blockscale_f8 on gfx90a by default. (#3338) * disable gemm_blockscale_f8 instances on gfx90a by default * fix cmake logic, diasble some cmake output * fix cmake logic [ROCm/composable_kernel commit: 2c284a1780acb790f7c52fb94c99694fa4e3f1fe] --- cmake/ShardInstantiation.cmake | 2 +- .../gpu/CMakeLists.txt | 42 ++++++------------- .../gpu/gemm_blockscale_wp/CMakeLists.txt | 42 ++++++++++--------- profiler/src/CMakeLists.txt | 14 +++---- test/ck_tile/gemm_tile_engine/CMakeLists.txt | 6 +-- 5 files changed, 45 insertions(+), 61 deletions(-) diff --git a/cmake/ShardInstantiation.cmake b/cmake/ShardInstantiation.cmake index 48ad21d3e9..b370bb080f 100644 --- a/cmake/ShardInstantiation.cmake +++ b/cmake/ShardInstantiation.cmake @@ -35,7 +35,7 @@ function(generate_sharded_instantiations) set(GENERATED_SOURCE_FILES "") set(EXTERN_TEMPLATE_STATEMENTS "") set(CALL_STATEMENTS "") - message(STATUS "Generating sharded instantiations for target: ${GEN_SHARDED_INSTANCES_NAME}") + message(DEBUG "Generating sharded instantiations for target: ${GEN_SHARDED_INSTANCES_NAME}") set(INSTANCES "${GEN_SHARDED_INSTANCES_NAME}") diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index 7c64fa7850..eeaf269394 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -82,23 +82,27 @@ function(add_instance_library INSTANCE_NAME) message(DEBUG "removing gemm_multiply_multiply_f8 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - if(NOT INST_TARGETS MATCHES "gfx94" AND NOT INST_TARGETS MATCHES "gfx95" AND NOT INST_TARGETS MATCHES "gfx12" AND source_name MATCHES "gemm_xdl_universal" AND source_name MATCHES "_f8_") + if(NOT INST_TARGETS MATCHES "gfx94|gfx95|gfx12" AND source_name MATCHES "gemm_xdl_universal" AND source_name MATCHES "_f8_") message(DEBUG "removing gemm_universal_f8 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() + if(NOT INST_TARGETS MATCHES "gfx94|gfx95|gfx12" AND source_name MATCHES "gemm_blockscale" AND source_name MATCHES "_f8_") + message(DEBUG "removing gemm_blockscale_f8 instance ${source} ") + list(REMOVE_ITEM ARGN "${source}") + endif() endif() # Do not build WMMA gemm_universal_f8 for any targets except gfx12+ if((NOT INST_TARGETS MATCHES "gfx12" OR FORCE_DISABLE_WMMA) AND source_name MATCHES "gemm_wmma_universal" AND source_name MATCHES "_f8_") - message(DEBUG "removing gemm_universal_f8 instance ${source} ") + message(DEBUG "removing gemm_wmma_universal_f8 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() # Do not build gemm_universal_preshuffle_f8 for any targets except gfx94, gfx95 and gfx12 - if(NOT (INST_TARGETS MATCHES "gfx942" OR INST_TARGETS MATCHES "gfx950" OR INST_TARGETS MATCHES "gfx12") AND (source_name MATCHES "gemm_universal_preshuffle" OR source_name MATCHES "gemm_xdl_universal_preshuffle") AND (source_name MATCHES "_f8_f8_f16" OR source_name MATCHES "_f8_f8_bf16")) + if(NOT (INST_TARGETS MATCHES "gfx94|gfx95|gfx12") AND (source_name MATCHES "gemm_universal_preshuffle") AND source_name MATCHES "_f8_" ) message(DEBUG "removing gemm_universal_preshuffle_f8 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() # Only build tf32 instances for gfx942 & gfx950 - if(NOT (INST_TARGETS MATCHES "gfx942" OR INST_TARGETS MATCHES "gfx950") AND source_name MATCHES "_tf32_") + if(NOT (INST_TARGETS MATCHES "gfx942|gfx950") AND source_name MATCHES "_tf32_") message(DEBUG "removing tf32 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() @@ -127,29 +131,11 @@ function(add_instance_library INSTANCE_NAME) #only build the fp8 gemm instances for gfx90a if the build argument is set, otherwise only build for gfx942/gfx950 and gfx1200/gfx1201 if(NOT CK_USE_FP8_ON_UNSUPPORTED_ARCH) - if(source_name MATCHES "gemm_xdl_universal" AND source_name MATCHES "f8") - list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) - endif() - if(source_name MATCHES "gemm_multiply_multiply" AND source_name MATCHES "f8") - list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) - endif() - if(source_name MATCHES "gemm_universal_preshuffle" AND source_name MATCHES "f8") - list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) - endif() - if(source_name MATCHES "gemm_xdl_universal_preshuffle" AND source_name MATCHES "f8") + if(source_name MATCHES "gemm_xdl_universal|gemm_multiply_multiply|gemm_universal_preshuffle|gemm_blockscale" AND source_name MATCHES "_f8_") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) endif() else() - if(source_name MATCHES "gemm_xdl_universal" AND source_name MATCHES "f8") - list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx908 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) - endif() - if(source_name MATCHES "gemm_multiply_multiply" AND source_name MATCHES "f8") - list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx908 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) - endif() - if(source_name MATCHES "gemm_universal_preshuffle" AND source_name MATCHES "f8") - list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx908 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) - endif() - if(source_name MATCHES "gemm_xdl_universal_preshuffle" AND source_name MATCHES "f8") + if(source_name MATCHES "gemm_xdl_universal|gemm_multiply_multiply|gemm_universal_preshuffle|gemm_blockscale" AND source_name MATCHES "_f8_") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx908 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx10-3-generic gfx11-generic) endif() endif() @@ -305,12 +291,8 @@ FOREACH(subdir_path ${dir_list}) message(DEBUG "Found gemm_multiply_multiply instances, but gfx94/gfx95/gfx11/gfx12 not on the target list. Skipping. ${cmake_instance}") set(add_inst 0) endif() - if(("${cmake_instance}" MATCHES "gemm_universal_preshuffle" AND "${cmake_instance}" MATCHES "_f8_" ) AND (NOT INST_TARGETS MATCHES "gfx94|gfx95|gfx12") AND (NOT CK_USE_FP8_ON_UNSUPPORTED_ARCH)) - message(DEBUG "Found gemm_universal_preshuffle_f8 instances, but gfx94/gfx95 not on the target list. Skipping.") - set(add_inst 0) - endif() - if(("${cmake_instance}" MATCHES "gemm_xdl_universal_preshuffle" AND "${cmake_instance}" MATCHES "_f8_" ) AND (NOT INST_TARGETS MATCHES "gfx94|gfx95|gfx12") AND (NOT CK_USE_FP8_ON_UNSUPPORTED_ARCH)) - message(DEBUG "Found gemm_xdl_universal_preshuffle_f8_f8_bf16 instances, but gfx94/gfx95 not on the target list. Skipping.") + if(("${cmake_instance}" MATCHES "gemm_universal_preshuffle|gemm_blockscale" AND "${cmake_instance}" MATCHES "_f8_" ) AND (NOT INST_TARGETS MATCHES "gfx94|gfx95|gfx12") AND (NOT CK_USE_FP8_ON_UNSUPPORTED_ARCH)) + message(DEBUG "Found gemm_f8 instances, but gfx94/gfx95 not on the target list. Skipping.") set(add_inst 0) endif() if ("${cmake_instance}" MATCHES "gemm_bilinear") diff --git a/library/src/tensor_operation_instance/gpu/gemm_blockscale_wp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_blockscale_wp/CMakeLists.txt index ff9b4ddece..b37a22d895 100644 --- a/library/src/tensor_operation_instance/gpu/gemm_blockscale_wp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/gemm_blockscale_wp/CMakeLists.txt @@ -2,25 +2,27 @@ # SPDX-License-Identifier: MIT # ONLY XDL_KERNELS -set(GEMM_BLOCKSCALE_WP_INSTANCES) +if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]|gfx12") + set(GEMM_BLOCKSCALE_WP_INSTANCES) -list(APPEND GEMM_BLOCKSCALE_WP_INSTANCES - device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp - device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp - device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp - device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp - ) -check_cxx_compiler_flag("-mllvm --misched-bottomup=1" HAS_MISCHED_BOTTOMUP) -check_cxx_compiler_flag("-mllvm --misched-prera-direction=bottomup" HAS_MISCHED_PRERA_DIRECTION) -if(HAS_MISCHED_BOTTOMUP) - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") -elseif(HAS_MISCHED_PRERA_DIRECTION) - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") - set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") + list(APPEND GEMM_BLOCKSCALE_WP_INSTANCES + device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp + device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp + device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp + device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp + ) + check_cxx_compiler_flag("-mllvm --misched-bottomup=1" HAS_MISCHED_BOTTOMUP) + check_cxx_compiler_flag("-mllvm --misched-prera-direction=bottomup" HAS_MISCHED_PRERA_DIRECTION) + if(HAS_MISCHED_BOTTOMUP) + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-bottomup=1") + elseif(HAS_MISCHED_PRERA_DIRECTION) + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_comp_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_default_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") + set_source_files_properties(device_gemm_blockscale_wp_xdl_f8_f8_bf16/device_gemm_blockscale_wp_xdl_f8_f8_bf16_mk_nk_mn_128_128_128_mem_v1_kpadding_instance.cpp PROPERTIES COMPILE_OPTIONS ";-mllvm;-greedy-reverse-local-assignment=1;-mllvm;--misched-prera-direction=bottomup") + endif() + add_instance_library(device_gemm_blockscale_wp_instance ${GEMM_BLOCKSCALE_WP_INSTANCES}) endif() -add_instance_library(device_gemm_blockscale_wp_instance ${GEMM_BLOCKSCALE_WP_INSTANCES}) diff --git a/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt index d169db3f0b..71f1637653 100644 --- a/profiler/src/CMakeLists.txt +++ b/profiler/src/CMakeLists.txt @@ -45,7 +45,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9") endif() endif() -if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]") +if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]") if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES) list(APPEND PROFILER_OPS profile_gemm_reduce.cpp) list(APPEND PROFILER_OPS profile_batched_gemm_add_relu_gemm_add.cpp) @@ -59,7 +59,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[1 list(APPEND PROFILER_OPS profile_grouped_gemm_tile_loop.cpp) list(APPEND PROFILER_OPS profile_grouped_gemm_multiply_tile_loop.cpp) endif() - if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12") + if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]|gfx12") list(APPEND PROFILER_OPS profile_gemm_multiply_multiply_wp.cpp) list(APPEND PROFILER_OPS profile_gemm_ab_scale.cpp) list(APPEND PROFILER_OPS profile_gemm_blockscale_wp.cpp) @@ -90,7 +90,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx(9[45]|1[12])") list(APPEND PROFILER_OPS profile_gemm_multiply_multiply.cpp) endif() -if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]") +if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]") list(APPEND PROFILER_OPS profile_gemm_universal.cpp) list(APPEND PROFILER_OPS profile_batched_gemm.cpp) list(APPEND PROFILER_OPS profile_batched_gemm_b_scale.cpp) @@ -164,7 +164,7 @@ list(APPEND DEVICE_INSTANCES device_column_to_image_instance) list(APPEND DEVICE_INSTANCES device_transpose_instance) list(APPEND DEVICE_INSTANCES device_permute_scale_instance) -if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]") +if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]") if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES) list(APPEND DEVICE_INSTANCES device_contraction_bilinear_instance) list(APPEND DEVICE_INSTANCES device_contraction_scale_instance) @@ -184,11 +184,11 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[1 list(APPEND DEVICE_INSTANCES device_grouped_gemm_tile_loop_instance) endif() list(APPEND DEVICE_INSTANCES device_batched_gemm_reduce_instance) - if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12") + if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]|gfx12") list(APPEND DEVICE_INSTANCES device_gemm_multiply_multiply_wp_instance) list(APPEND DEVICE_INSTANCES device_gemm_universal_preshuffle_instance) endif() - if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]") + if(SUPPORTED_GPU_TARGETS MATCHES "gfx9[45]|gfx1[12]") list(APPEND DEVICE_INSTANCES device_gemm_ab_scale_instance) list(APPEND DEVICE_INSTANCES device_gemm_blockscale_wp_instance) endif() @@ -228,7 +228,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx(9[45]|1[12])") list(APPEND DEVICE_INSTANCES device_gemm_multiply_multiply_instance) endif() -if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx1[12]") +if(SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx1[12]") list(APPEND DEVICE_INSTANCES device_gemm_universal_instance) list(APPEND DEVICE_INSTANCES device_batched_gemm_instance) list(APPEND DEVICE_INSTANCES device_gemm_b_scale_instance) diff --git a/test/ck_tile/gemm_tile_engine/CMakeLists.txt b/test/ck_tile/gemm_tile_engine/CMakeLists.txt index f68b446e21..33effcc120 100644 --- a/test/ck_tile/gemm_tile_engine/CMakeLists.txt +++ b/test/ck_tile/gemm_tile_engine/CMakeLists.txt @@ -87,7 +87,7 @@ function(create_individual_gemm_test_target datatype layout config_name trait ti target_compile_options(${target_name} PRIVATE -DCK_TILE_USE_OCP_FP8) endif() - message(STATUS " Created test target: ${target_name}") + message(DEBUG " Created test target: ${target_name}") endfunction() # ============================================================================ @@ -138,11 +138,11 @@ function(build_gemm_test_targets datatype layout config_name) # Verify kernel list file was generated if(NOT EXISTS ${working_path}/gemm_kernel_list.txt) - message(STATUS "No kernels found for ${datatype}_${layout}_${config_name} (validation filtered out all combinations)") + message(DEBUG "No kernels found for ${datatype}_${layout}_${config_name} (validation filtered out all combinations)") return() endif() - message(STATUS "Building tests for ${datatype}_${layout}_${config_name}") + message(DEBUG "Building tests for ${datatype}_${layout}_${config_name}") # STEP 2a: Extract test parameters from config set(test_params_file "${working_path}/test_params.hpp")