From bbb263ab7ede32c772aed8f0e28e91d331976309 Mon Sep 17 00:00:00 2001 From: Yi DING Date: Thu, 24 Jul 2025 17:38:14 +0800 Subject: [PATCH] Use filename but not path to filter compilation (#2556) [ROCm/composable_kernel commit: 4338346b106c1fe03d4e00be375d4d87052bc46b] --- example/CMakeLists.txt | 167 +++++++++--------- .../gpu/CMakeLists.txt | 95 +++++----- 2 files changed, 122 insertions(+), 140 deletions(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 3c67e9214f..7bd628edf2 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -24,26 +24,27 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME) set(result 1) if(DEFINED DTYPES) foreach(source IN LISTS FILE_NAME) + get_filename_component(source_name ${source} NAME) set(test 0) - if((source MATCHES "_fp16" OR source MATCHES "_f16") AND NOT "fp16" IN_LIST DTYPES) + if((source_name MATCHES "_fp16" OR source_name MATCHES "_f16") AND NOT "fp16" IN_LIST DTYPES) set(test 1) endif() - if((source MATCHES "_fp32" OR source MATCHES "_f32") AND NOT "fp32" IN_LIST DTYPES) + if((source_name MATCHES "_fp32" OR source_name MATCHES "_f32") AND NOT "fp32" IN_LIST DTYPES) set(test 1) endif() - if((source MATCHES "_fp64" OR source MATCHES "_f64") AND NOT "fp64" IN_LIST DTYPES) + if((source_name MATCHES "_fp64" OR source_name MATCHES "_f64") AND NOT "fp64" IN_LIST DTYPES) set(test 1) endif() - if((source MATCHES "_fp8" OR source MATCHES "_f8") AND NOT "fp8" IN_LIST DTYPES) + if((source_name MATCHES "_fp8" OR source_name MATCHES "_f8") AND NOT "fp8" IN_LIST DTYPES) set(test 1) endif() - if((source MATCHES "_bf8" OR source MATCHES "_bf8") AND NOT "bf8" IN_LIST DTYPES) + if((source_name MATCHES "_bf8" OR source_name MATCHES "_bf8") AND NOT "bf8" IN_LIST DTYPES) set(test 1) endif() - if((source MATCHES "_bf16" OR source MATCHES "_b16") AND NOT "bf16" IN_LIST DTYPES) + if((source_name MATCHES "_bf16" OR source_name MATCHES "_b16") AND NOT "bf16" IN_LIST DTYPES) set(test 1) endif() - if((source MATCHES "_int8" OR source MATCHES "_i8") AND NOT "int8" IN_LIST DTYPES) + if((source_name MATCHES "_int8" OR source_name MATCHES "_i8") AND NOT "int8" IN_LIST DTYPES) set(test 1) endif() if(test EQUAL 1) @@ -55,73 +56,65 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME) set(EX_TARGETS ${SUPPORTED_GPU_TARGETS}) - #Do not build any DL examples if DL_KERNELS not set foreach(source IN LISTS FILE_NAME) - if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") + get_filename_component(source_name ${source} NAME) + #Do not build any DL examples if DL_KERNELS not set + if(NOT DEFINED DL_KERNELS AND source_name MATCHES "_dl") message(DEBUG "removing dl example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any DPP examples if DPP_KERNELS not set - foreach(source IN LISTS FILE_NAME) - if(NOT DEFINED DPP_KERNELS AND source MATCHES "_dpp") + #Do not build any DPP examples if DPP_KERNELS not set + if(NOT DEFINED DPP_KERNELS AND source_name MATCHES "_dpp") message(DEBUG "removing dpp example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any XDL examples if gfx9 targets are not on the list - foreach(source IN LISTS FILE_NAME) - if(NOT EX_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") + #Do not build any XDL examples if gfx9 targets are not on the list + if(NOT EX_TARGETS MATCHES "gfx9" AND source_name MATCHES "_xdl") message(DEBUG "removing xdl example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any WMMA examples if gfx11 targets are not on the list - foreach(source IN LISTS FILE_NAME) - if(NOT EX_TARGETS MATCHES "gfx11" AND NOT EX_TARGETS MATCHES "gfx12" AND source MATCHES "_wmma") + #Do not build any WMMA examples if gfx11 targets are not on the list + if(NOT EX_TARGETS MATCHES "gfx11" AND NOT EX_TARGETS MATCHES "gfx12" AND source_name MATCHES "_wmma") message(DEBUG "removing wmma example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any microscaling examples if gfx950 target is not on the list - foreach(source IN LISTS FILE_NAME) - if(NOT EX_TARGETS MATCHES "gfx950" AND source MATCHES "_mx") + #Do not build any microscaling examples if gfx950 target is not on the list + if(NOT EX_TARGETS MATCHES "gfx950" AND source_name MATCHES "_mx") message(DEBUG "removing microscaling example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any FP8 examples if CK_ENABLE_FP8 not set - foreach(source IN LISTS FILE_NAME) - if(NOT DEFINED CK_ENABLE_FP8 AND source MATCHES "_fp8") + #Do not build any FP8 examples if CK_ENABLE_FP8 not set + if(NOT DEFINED CK_ENABLE_FP8 AND source_name MATCHES "_fp8") message(DEBUG "removing fp8 example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any BF8 examples if CK_ENABLE_BF8 not set - foreach(source IN LISTS FILE_NAME) - if(NOT DEFINED CK_ENABLE_BF8 AND source MATCHES "_bf8") + #Do not build any BF8 examples if CK_ENABLE_BF8 not set + if(NOT DEFINED CK_ENABLE_BF8 AND source_name MATCHES "_bf8") message(DEBUG "removing bf8 example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - # Build fp8 gemm_multiply_multiply and moe only on gfx94/95 - foreach(source IN LISTS FILE_NAME) - if(NOT EX_TARGETS MATCHES "gfx94" AND NOT EX_TARGETS MATCHES "gfx95") - if (source MATCHES "fp8" AND source MATCHES "(gemm_multiply_multiply|moe)") - message(DEBUG "Skipping ${source} example for current target") - list(REMOVE_ITEM FILE_NAME "${source}") + # Build fp8 gemm_multiply_multiply and moe only on gfx94/95 + if(NOT EX_TARGETS MATCHES "gfx94" AND NOT EX_TARGETS MATCHES "gfx95") + if(source_name MATCHES "fp8" AND source_name MATCHES "(gemm_multiply_multiply|moe)") + message(DEBUG "Skipping ${source} example for current target") + list(REMOVE_ITEM FILE_NAME "${source}") + endif() endif() - endif() endforeach() #only continue if there are some source files left on the list + set(source_name_list "") + foreach(source IN LISTS FILE_NAME) + get_filename_component(source_name ${source} NAME) + list(APPEND source_name_list ${source_name}) + endforeach() if(FILE_NAME) - if(FILE_NAME MATCHES "_xdl" AND NOT FILE_NAME MATCHES "_pk_i4") + if(source_name_list MATCHES "_xdl" AND NOT source_name_list MATCHES "_pk_i4") list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) - elseif(FILE_NAME MATCHES "_wmma") + elseif(source_name_list MATCHES "_wmma") list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx942 gfx1030 gfx950) - elseif(FILE_NAME MATCHES "_mx") #only build mx example for gfx950 + elseif(source_name_list MATCHES "_mx") #only build mx example for gfx950 list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) - elseif(FILE_NAME MATCHES "_pk_i4") #only build these examples for gfx942 and gfx950 + elseif(source_name_list MATCHES "_pk_i4") #only build these examples for gfx942 and gfx950 message(DEBUG "trimming targets for ${FILE_NAME}") list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) endif() @@ -130,7 +123,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME) target_link_libraries(${EXAMPLE_NAME} PRIVATE utility) target_link_libraries(${EXAMPLE_NAME} PRIVATE getopt::getopt) add_test(NAME ${EXAMPLE_NAME} COMMAND $ ${ARGN}) - set_property(TARGET ${EXAMPLE_NAME} PROPERTY HIP_ARCHITECTURES ${EX_TARGETS} ) + set_property(TARGET ${EXAMPLE_NAME} PROPERTY HIP_ARCHITECTURES ${EX_TARGETS}) add_dependencies(examples ${EXAMPLE_NAME}) add_dependencies(check ${EXAMPLE_NAME}) rocm_install(TARGETS ${EXAMPLE_NAME} COMPONENT examples) @@ -157,71 +150,71 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME) message(DEBUG "adding example ${EXAMPLE_NAME}") set(result 1) if(DEFINED DTYPES) - foreach(source IN LISTS FILE_NAME) - set(test 0) - if((source MATCHES "_fp16" OR source MATCHES "_f16") AND NOT "fp16" IN_LIST DTYPES) - set(test 1) - endif() - if((source MATCHES "_fp32" OR source MATCHES "_f32") AND NOT "fp32" IN_LIST DTYPES) - set(test 1) - endif() - if((source MATCHES "_fp64" OR source MATCHES "_f64") AND NOT "fp64" IN_LIST DTYPES) - set(test 1) - endif() - if((source MATCHES "_fp8" OR source MATCHES "_f8") AND NOT "fp8" IN_LIST DTYPES) - set(test 1) - endif() - if((source MATCHES "_bf8" OR source MATCHES "_bf8") AND NOT "bf8" IN_LIST DTYPES) - set(test 1) - endif() - if((source MATCHES "_bf16" OR source MATCHES "_b16") AND NOT "bf16" IN_LIST DTYPES) - set(test 1) - endif() - if((source MATCHES "_int8" OR source MATCHES "_i8") AND NOT "int8" IN_LIST DTYPES) - set(test 1) - endif() - if(test EQUAL 1) - message(DEBUG "removing example ${source} ") - list(REMOVE_ITEM FILE_NAME "${source}") - endif() - endforeach() + foreach(source IN LISTS FILE_NAME) + get_filename_component(source_name ${source} NAME) + set(test 0) + if((source_name MATCHES "_fp16" OR source_name MATCHES "_f16") AND NOT "fp16" IN_LIST DTYPES) + set(test 1) + endif() + if((source_name MATCHES "_fp32" OR source_name MATCHES "_f32") AND NOT "fp32" IN_LIST DTYPES) + set(test 1) + endif() + if((source_name MATCHES "_fp64" OR source_name MATCHES "_f64") AND NOT "fp64" IN_LIST DTYPES) + set(test 1) + endif() + if((source_name MATCHES "_fp8" OR source_name MATCHES "_f8") AND NOT "fp8" IN_LIST DTYPES) + set(test 1) + endif() + if((source_name MATCHES "_bf8" OR source_name MATCHES "_bf8") AND NOT "bf8" IN_LIST DTYPES) + set(test 1) + endif() + if((source_name MATCHES "_bf16" OR source_name MATCHES "_b16") AND NOT "bf16" IN_LIST DTYPES) + set(test 1) + endif() + if((source_name MATCHES "_int8" OR source_name MATCHES "_i8") AND NOT "int8" IN_LIST DTYPES) + set(test 1) + endif() + if(test EQUAL 1) + message(DEBUG "removing example ${source} ") + list(REMOVE_ITEM FILE_NAME "${source}") + endif() + endforeach() endif() set(EX_TARGETS ${SUPPORTED_GPU_TARGETS}) - #Do not build any DL examples if DL_KERNELS not set + set(source_name_list "") foreach(source IN LISTS FILE_NAME) - if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") + get_filename_component(source_name ${source} NAME) + #Do not build any DL examples if DL_KERNELS not set + if(NOT DEFINED DL_KERNELS AND source_name MATCHES "_dl") message(DEBUG "removing dl example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any XDL examples if gfx9 targets are not on the list - foreach(source IN LISTS FILE_NAME) - if(NOT EX_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") + #Do not build any XDL examples if gfx9 targets are not on the list + if(NOT EX_TARGETS MATCHES "gfx9" AND source_name MATCHES "_xdl") message(DEBUG "removing xdl example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() - endforeach() - #Do not build any WMMA examples if gfx11 targets are not on the list - foreach(source IN LISTS FILE_NAME) - if(NOT EX_TARGETS MATCHES "gfx11" AND NOT EX_TARGETS MATCHES "gfx12" AND source MATCHES "_wmma") + #Do not build any WMMA examples if gfx11 targets are not on the list + if(NOT EX_TARGETS MATCHES "gfx11" AND NOT EX_TARGETS MATCHES "gfx12" AND source_name MATCHES "_wmma") message(DEBUG "removing wmma example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() + list(APPEND source_name_list ${source_name}) endforeach() #only continue if there are some source files left on the list if(FILE_NAME) - if(FILE_NAME MATCHES "_xdl") + if(source_name_list MATCHES "_xdl") list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) - elseif(FILE_NAME MATCHES "_wmma") + elseif(source_name_list MATCHES "_wmma") list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx942 gfx1030 gfx950) endif() set_source_files_properties(${FILE_NAME} PROPERTIES LANGUAGE HIP) add_executable(${EXAMPLE_NAME} ${FILE_NAME}) target_link_libraries(${EXAMPLE_NAME} PRIVATE utility) add_dependencies(examples ${EXAMPLE_NAME}) - set_property(TARGET ${EXAMPLE_NAME} PROPERTY HIP_ARCHITECTURES ${EX_TARGETS} ) + set_property(TARGET ${EXAMPLE_NAME} PROPERTY HIP_ARCHITECTURES ${EX_TARGETS}) rocm_install(TARGETS ${EXAMPLE_NAME} COMPONENT examples) set(result 0) endif() diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index d1466206f0..90e8dc0221 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -3,6 +3,7 @@ function(add_instance_library INSTANCE_NAME) set(result 1) if(DEFINED DTYPES) foreach(source IN LISTS ARGN) + get_filename_component(source_name ${source} NAME) set(test 0) foreach(type IN LISTS DTYPES) if(type MATCHES "fp16") @@ -19,13 +20,13 @@ function(add_instance_library INSTANCE_NAME) set(type1 "_i8") endif() #make an exception for reduction kernels - if("${source}" MATCHES "${type}" OR "${source}" MATCHES "${type1}" OR "${source}" MATCHES "device_reduce_instance" OR ${source} MATCHES "device_image_to_column") + if("${source_name}" MATCHES "${type}" OR "${source_name}" MATCHES "${type1}" OR "${source_name}" MATCHES "device_reduce_instance" OR ${source_name} MATCHES "device_image_to_column") #if filename matches any selected type, exit type loop and do no exclude the file from the list set(test 0) break() - elseif((source MATCHES "fp8" OR source MATCHES "fp32" OR source MATCHES "fp64" OR source MATCHES "bf16" OR source MATCHES "int8" OR source MATCHES "fp16" OR - source MATCHES "_f8" OR source MATCHES "_f32" OR source MATCHES "_f64" OR source MATCHES "_i8" OR source MATCHES "_f16" OR source MATCHES "_b16") AND - NOT(source MATCHES type OR source MATCHES type1)) + elseif((source_name MATCHES "fp8" OR source_name MATCHES "fp32" OR source_name MATCHES "fp64" OR source_name MATCHES "bf16" OR source_name MATCHES "int8" OR source_name MATCHES "fp16" OR + source_name MATCHES "_f8" OR source_name MATCHES "_f32" OR source_name MATCHES "_f64" OR source_name MATCHES "_i8" OR source_name MATCHES "_f16" OR source_name MATCHES "_b16") AND + NOT (source_name MATCHES type OR source_name MATCHES type1)) #if filename contains a type which doesn't match any selected type, mark it for removal set(test 1) endif() @@ -39,66 +40,52 @@ function(add_instance_library INSTANCE_NAME) set(INST_TARGETS ${SUPPORTED_GPU_TARGETS}) - # Do not build DPP instances if DPP_KERNELS macro is not set foreach(source IN LISTS ARGN) - if(NOT DEFINED DPP_KERNELS AND source MATCHES "_dpp") + get_filename_component(source_name ${source} NAME) + + # Do not build DPP instances if DPP_KERNELS macro is not set + if(NOT DEFINED DPP_KERNELS AND source_name MATCHES "_dpp") message(DEBUG "removing dpp instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - endforeach() - # Do not build DL instances if DL_KERNELS macro is not set - foreach(source IN LISTS ARGN) - if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") + # Do not build DL instances if DL_KERNELS macro is not set + if(NOT DEFINED DL_KERNELS AND source_name MATCHES "_dl") message(DEBUG "removing dl instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - endforeach() - # Do not build XDL instances if gfx9 targets are not on the target list - foreach(source IN LISTS ARGN) - if(NOT INST_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") + # Do not build XDL instances if gfx9 targets are not on the target list + if(NOT INST_TARGETS MATCHES "gfx9" AND source_name MATCHES "_xdl") message(DEBUG "removing xdl instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - endforeach() - # Do not build MX instances if gfx950 targets are not on the target list - foreach(source IN LISTS ARGN) - if(NOT INST_TARGETS MATCHES "gfx950" AND source MATCHES "_mx") + # Do not build MX instances if gfx950 targets are not on the target list + if(NOT INST_TARGETS MATCHES "gfx950" AND source_name MATCHES "_mx") message(DEBUG "removing MX instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - endforeach() - # Do not build WMMA instances if gfx11 targets are not on the target list - foreach(source IN LISTS ARGN) - if(NOT INST_TARGETS MATCHES "gfx11" AND NOT INST_TARGETS MATCHES "gfx12" AND source MATCHES "_wmma") + # Do not build WMMA instances if gfx11 targets are not on the target list + if(NOT INST_TARGETS MATCHES "gfx11" AND NOT INST_TARGETS MATCHES "gfx12" AND source_name MATCHES "_wmma") message(DEBUG "removing wmma instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - endforeach() - # Do not build mha instances if gfx94 or gfx90a targets are not on the target list - foreach(source IN LISTS ARGN) - if((NOT BUILD_MHA_LIB OR (NOT INST_TARGETS MATCHES "gfx94" AND NOT INST_TARGETS MATCHES "gfx90a" AND NOT INST_TARGETS MATCHES "gfx95")) AND source MATCHES "mha") - message(DEBUG "removing mha instance ${source} ") - list(REMOVE_ITEM ARGN "${source}") - endif() - endforeach() - # Do not build XDL gemm_universal_f8 or gemm_multiply_multiply_f8 for any targets except gfx94 - if(NOT CK_USE_FP8_ON_UNSUPPORTED_ARCH) - foreach(source IN LISTS ARGN) - if(NOT INST_TARGETS MATCHES "gfx94" AND NOT INST_TARGETS MATCHES "gfx95" AND source MATCHES "gemm_multiply_multiply" AND source MATCHES "_f8_") + # Do not build mha instances if gfx94 or gfx90a targets are not on the target list + if((NOT BUILD_MHA_LIB OR (NOT INST_TARGETS MATCHES "gfx94" AND NOT INST_TARGETS MATCHES "gfx90a" AND NOT INST_TARGETS MATCHES "gfx95")) AND source_name MATCHES "mha") + message(DEBUG "removing mha instance ${source} ") + list(REMOVE_ITEM ARGN "${source}") + endif() + # Do not build XDL gemm_universal_f8 or gemm_multiply_multiply_f8 for any targets except gfx94 + if(NOT CK_USE_FP8_ON_UNSUPPORTED_ARCH) + if(NOT INST_TARGETS MATCHES "gfx94" AND NOT INST_TARGETS MATCHES "gfx95" AND source_name MATCHES "gemm_multiply_multiply" AND source_name MATCHES "_f8_") message(DEBUG "removing gemm_multiply_multiply_f8 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - endforeach() - foreach(source IN LISTS ARGN) - if(NOT INST_TARGETS MATCHES "gfx94" AND NOT INST_TARGETS MATCHES "gfx95" AND source MATCHES "gemm_xdl_universal" AND source MATCHES "_f8_") + if(NOT INST_TARGETS MATCHES "gfx94" AND NOT INST_TARGETS MATCHES "gfx95" AND source_name MATCHES "gemm_xdl_universal" AND source_name MATCHES "_f8_") message(DEBUG "removing gemm_universal_f8 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() - endforeach() - endif() - # Do not build WMMA gemm_universal_f8 for any targets except gfx12+ - foreach(source IN LISTS ARGN) - if(NOT INST_TARGETS MATCHES "gfx12" AND source MATCHES "gemm_wmma_universal" AND source MATCHES "_f8_") + endif() + # Do not build WMMA gemm_universal_f8 for any targets except gfx12+ + if(NOT INST_TARGETS MATCHES "gfx12" AND source_name MATCHES "gemm_wmma_universal" AND source_name MATCHES "_f8_") message(DEBUG "removing gemm_universal_f8 instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() @@ -109,41 +96,43 @@ function(add_instance_library INSTANCE_NAME) if(ARGN) set(INST_OBJ) foreach(source IN LISTS ARGN) + get_filename_component(source_name ${source} NAME) + set(INST_TARGETS ${SUPPORTED_GPU_TARGETS}) - if(source MATCHES "_xdl") + if(source_name MATCHES "_xdl") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) - elseif(source MATCHES "_wmma") + elseif(source_name MATCHES "_wmma") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack+ gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx942 gfx1030 gfx950) - elseif(source MATCHES "mha") + elseif(source_name MATCHES "mha") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx908 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) endif() - if(source MATCHES "_mx") + if(source_name MATCHES "_mx") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) endif() #only build the fp8 gemm instances for gfx90a if the build argument is set, otherwise only build for gfx942/gfx950 if(NOT CK_USE_FP8_ON_UNSUPPORTED_ARCH) - if(source MATCHES "gemm_xdl_universal" AND source MATCHES "f8") + if(source_name MATCHES "gemm_xdl_universal" AND source_name MATCHES "f8") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) endif() - if(source MATCHES "gemm_multiply_multiply" AND source MATCHES "f8") + if(source_name MATCHES "gemm_multiply_multiply" AND source_name MATCHES "f8") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx90a:xnack+ gfx90a:xnack- gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) endif() else() - if(source MATCHES "gemm_xdl_universal" AND source MATCHES "f8") + if(source_name MATCHES "gemm_xdl_universal" AND source_name MATCHES "f8") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx908 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) endif() - if(source MATCHES "gemm_multiply_multiply" AND source MATCHES "f8") + if(source_name MATCHES "gemm_multiply_multiply" AND source_name MATCHES "f8") list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx906:xnack- gfx908:xnack- gfx908:xnack+ gfx908 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1200 gfx1201 gfx10-3-generic gfx11-generic gfx12-generic) endif() endif() - if(source MATCHES "gemm_wmma_universal" AND source MATCHES "f8") + if(source_name MATCHES "gemm_wmma_universal" AND source_name MATCHES "f8") list(FILTER INST_TARGETS INCLUDE REGEX "gfx12") endif() set(offload_targets) foreach(target IN LISTS INST_TARGETS) - string(APPEND offload_targets "--offload-arch=${target} ") + string(APPEND offload_targets "--offload-arch=${target} ") endforeach() set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS ${offload_targets}) list(APPEND INST_OBJ ${source}) @@ -165,7 +154,7 @@ function(add_instance_library INSTANCE_NAME) list(APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_APPENDKV_API=1) target_compile_options(device_mha_instance PRIVATE ${FMHA_COMPILE_OPTIONS}) endif() - + target_compile_features(${INSTANCE_NAME} PUBLIC) # flags to compress the library