From beb7927f5202f83cb27fae3473ed7dde0c98a004 Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Wed, 22 May 2024 11:45:27 -0700 Subject: [PATCH] Select appropriate GPU targets for instances, tests, and examples. (#1304) * set individual gpu targets for instances, examples, tests * fix path to hip compiler * fix path to hip compiler once more * aggregate device macros in ck_tile config header * fix the cmake logic for instances * fix clang format * add gfx900 and gfx906 to default set of targets [ROCm/composable_kernel commit: 7b027d5643b3e0cf15bd13ea85c4f09a0675f6c1] --- CMakeLists.txt | 16 +++--- example/CMakeLists.txt | 35 +++++++++++-- .../gpu/CMakeLists.txt | 50 ++++++++++++++++--- test/CMakeLists.txt | 36 +++++++++++-- 4 files changed, 115 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c23746e7f3..3f9e445837 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ endif() set(version 1.1.0) # Check support for CUDA/HIP in Cmake -project(composable_kernel VERSION ${version} LANGUAGES CXX) +project(composable_kernel VERSION ${version} LANGUAGES CXX HIP) include(CTest) find_package(Python3 3.6 COMPONENTS Interpreter REQUIRED) @@ -112,7 +112,7 @@ message("checking which targets are supported") #Setting GPU_TARGETS on command line will override this list if(NOT PROFILER_ONLY) rocm_check_target_ids(DEFAULT_GPU_TARGETS - TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102") + TARGETS "gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102") else() add_definitions(-DPROFILER_ONLY) set(GPU_TARGETS "" CACHE STRING "" FORCE) @@ -135,12 +135,10 @@ endif() message("Supported GPU_TARGETS= ${DEFAULT_GPU_TARGETS}") -set(AMDGPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING " " FORCE) - if(GPU_TARGETS) message("Building CK for the following targets: ${GPU_TARGETS}") else() - message("Building CK for the following targets: ${AMDGPU_TARGETS}") + message("Building CK for the default targets: ${DEFAULT_GPU_TARGETS}") endif() if (GPU_TARGETS) @@ -225,7 +223,13 @@ link_libraries(Threads::Threads) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") +message("CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}") + +## HIP +set(CMAKE_HIP_PLATFORM amd) +set(CMAKE_HIP_COMPILER ${CMAKE_CXX_COMPILER}) +set(CMAKE_HIP_EXTENSIONS ON) +message("CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}") ## OpenMP if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 5465adb779..fd9f5cd89d 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -44,6 +44,13 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME) endif() endforeach() endif() + + if(INSTANCES_ONLY) + set(EX_TARGETS ${DEFAULT_GPU_TARGETS}) + else() + set(EX_TARGETS ${GPU_TARGETS}) + endif() + #Do not build any DL examples if DL_KERNELS not set foreach(source IN LISTS FILE_NAME) if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") @@ -53,23 +60,30 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME) endforeach() #Do not build any XDL examples if gfx9 targets are not on the list foreach(source IN LISTS FILE_NAME) - if(NOT GPU_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") + if(NOT EX_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") message("removing xdl example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() endforeach() #Do not build any WMMA examples if gfx11 targets are not on the list foreach(source IN LISTS FILE_NAME) - if(NOT GPU_TARGETS MATCHES "gfx11" AND source MATCHES "_wmma") + if(NOT EX_TARGETS MATCHES "gfx11" AND source MATCHES "_wmma") message("removing wmma example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() endforeach() #only continue if there are some source files left on the list if(FILE_NAME) + if(FILE_NAME MATCHES "_xdl") + list(REMOVE_ITEM EX_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) + elseif(FILE_NAME MATCHES "_wmma") + list(REMOVE_ITEM EX_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) + endif() + set_source_files_properties(${FILE_NAME} PROPERTIES LANGUAGE HIP) add_executable(${EXAMPLE_NAME} ${FILE_NAME}) target_link_libraries(${EXAMPLE_NAME} PRIVATE utility) add_test(NAME ${EXAMPLE_NAME} COMMAND $ ${ARGN}) + set_property(TARGET ${EXAMPLE_NAME} PROPERTY HIP_ARCHITECTURES ${EX_TARGETS} ) add_dependencies(examples ${EXAMPLE_NAME}) add_dependencies(check ${EXAMPLE_NAME}) rocm_install(TARGETS ${EXAMPLE_NAME} COMPONENT examples) @@ -118,6 +132,12 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME) endif() endforeach() endif() + + if(INSTANCES_ONLY) + set(EX_TARGETS ${DEFAULT_GPU_TARGETS}) + else() + set(EX_TARGETS ${GPU_TARGETS}) + endif() #Do not build any DL examples if DL_KERNELS not set foreach(source IN LISTS FILE_NAME) if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") @@ -127,23 +147,30 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME) endforeach() #Do not build any XDL examples if gfx9 targets are not on the list foreach(source IN LISTS FILE_NAME) - if(NOT GPU_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") + if(NOT EX_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") message("removing xdl example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() endforeach() #Do not build any WMMA examples if gfx11 targets are not on the list foreach(source IN LISTS FILE_NAME) - if(NOT GPU_TARGETS MATCHES "gfx11" AND source MATCHES "_wmma") + if(NOT EX_TARGETS MATCHES "gfx11" AND source MATCHES "_wmma") message("removing wmma example ${source} ") list(REMOVE_ITEM FILE_NAME "${source}") endif() endforeach() #only continue if there are some source files left on the list if(FILE_NAME) + if(FILE_NAME MATCHES "_xdl") + list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) + elseif(FILE_NAME MATCHES "_wmma") + list(REMOVE_ITEM EX_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) + endif() + set_source_files_properties(${FILE_NAME} PROPERTIES LANGUAGE HIP) add_executable(${EXAMPLE_NAME} ${FILE_NAME}) target_link_libraries(${EXAMPLE_NAME} PRIVATE utility) add_dependencies(examples ${EXAMPLE_NAME}) + set_property(TARGET ${EXAMPLE_NAME} PROPERTY HIP_ARCHITECTURES ${EX_TARGETS} ) rocm_install(TARGETS ${EXAMPLE_NAME} COMPONENT examples) set(result 0) endif() diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index c035e7e564..05b8c035c4 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -36,6 +36,13 @@ function(add_instance_library INSTANCE_NAME) endif() endforeach() endif() + + if(INSTANCES_ONLY) + set(INST_TARGETS ${DEFAULT_GPU_TARGETS}) + else() + set(INST_TARGETS ${GPU_TARGETS}) + endif() + # Do not build DL instances if DL_KERNELS macro is not set foreach(source IN LISTS ARGN) if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") @@ -45,21 +52,40 @@ function(add_instance_library INSTANCE_NAME) endforeach() # Do not build XDL instances if gfx9 targets are not on the target list foreach(source IN LISTS ARGN) - if(NOT GPU_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") + if(NOT INST_TARGETS MATCHES "gfx9" AND source MATCHES "_xdl") message("removing xdl instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() endforeach() # Do not build WMMA instances if gfx11 targets are not on the target list foreach(source IN LISTS ARGN) - if(NOT GPU_TARGETS MATCHES "gfx11" AND source MATCHES "_wmma") + if(NOT INST_TARGETS MATCHES "gfx11" AND source MATCHES "_wmma") message("removing wmma instance ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() endforeach() #only continue if there are some source files left on the list if(ARGN) - add_library(${INSTANCE_NAME} OBJECT ${ARGN}) + set(INST_OBJ) + foreach(source IN LISTS ARGN) + if(INSTANCES_ONLY) + set(INST_TARGETS ${DEFAULT_GPU_TARGETS}) + else() + set(INST_TARGETS ${GPU_TARGETS}) + endif() + if(source MATCHES "_xdl") + list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) + elseif(ARGN MATCHES "_wmma") + list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) + endif() + set(offload_targets) + foreach(target IN LISTS INST_TARGETS) + string(APPEND offload_targets "--offload-arch=${target} ") + endforeach() + set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS ${offload_targets}) + list(APPEND INST_OBJ ${source}) + endforeach() + add_library(${INSTANCE_NAME} OBJECT ${INST_OBJ}) target_compile_features(${INSTANCE_NAME} PUBLIC) set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) clang_tidy_check(${INSTANCE_NAME}) @@ -131,6 +157,14 @@ FOREACH(subdir_path ${dir_list}) if(NOT DEFINED DTYPES) set(add_inst 1) endif() + + if(INSTANCES_ONLY) + set(INST_TARGETS ${DEFAULT_GPU_TARGETS}) + else() + set(INST_TARGETS ${GPU_TARGETS}) + endif() + + if(("${cmake_instance}" MATCHES "quantization") AND (DEFINED DTYPES) AND (NOT DTYPES MATCHES "int8")) message("quantization instances will not be built!") set(add_inst 0) @@ -139,23 +173,23 @@ FOREACH(subdir_path ${dir_list}) message("Found only dl instances, but DL_KERNELS is not set. Skipping.") set(add_inst 0) endif() - if(("${cmake_instance}" MATCHES "ONLY XDL_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx9")) + if(("${cmake_instance}" MATCHES "ONLY XDL_KERNELS") AND (NOT INST_TARGETS MATCHES "gfx9")) message("Found only xdl instances, but gfx9 is not on the targets list. Skipping.") set(add_inst 0) endif() - if(("${cmake_instance}" MATCHES "ONLY WMMA_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx11")) + if(("${cmake_instance}" MATCHES "ONLY WMMA_KERNELS") AND (NOT INST_TARGETS MATCHES "gfx11")) message("Found only wmma instances, but gfx11 is not on the targets list. Skipping.") set(add_inst 0) endif() - if(("${cmake_instance}" MATCHES "ONLY XDL_AND_DL_KERNELS") AND (NOT DEFINED DL_KERNELS) AND (NOT GPU_TARGETS MATCHES "gfx9")) + if(("${cmake_instance}" MATCHES "ONLY XDL_AND_DL_KERNELS") AND (NOT DEFINED DL_KERNELS) AND (NOT INST_TARGETS MATCHES "gfx9")) message("Found only xdl and dl instances, but gfx9 is not on the targets listand DL_KERNELS is not set. Skipping.") set(add_inst 0) endif() - if(("${cmake_instance}" MATCHES "ONLY XDL_AND_WMMA_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx11") AND (NOT GPU_TARGETS MATCHES "gfx9")) + if(("${cmake_instance}" MATCHES "ONLY XDL_AND_WMMA_KERNELS") AND (NOT INST_TARGETS MATCHES "gfx11") AND (NOT INST_TARGETS MATCHES "gfx9")) message("Found only xdl and wmma instances, but gfx11 and gfx9 are not on the targets list. Skipping.") set(add_inst 0) endif() - if(("${cmake_instance}" MATCHES "XDL_DL_WMMA_KERNELS") AND (NOT GPU_TARGETS MATCHES "gfx11") AND (NOT GPU_TARGETS MATCHES "gfx9") AND (NOT DEFINED DL_KERNELS)) + if(("${cmake_instance}" MATCHES "XDL_DL_WMMA_KERNELS") AND (NOT INST_TARGETS MATCHES "gfx11") AND (NOT INST_TARGETS MATCHES "gfx9") AND (NOT DEFINED DL_KERNELS)) message("Found xdl, dl, and wmma instances, but none of those meet the target list. Skipping.") set(add_inst 0) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 25c63ac7fe..49b67992b1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -40,6 +40,13 @@ function(add_test_executable TEST_NAME) endif() endforeach() endif() + + if(INSTANCES_ONLY) + set(TEST_TARGETS ${DEFAULT_GPU_TARGETS}) + else() + set(TEST_TARGETS ${GPU_TARGETS}) + endif() + foreach(source IN LISTS ARGN) if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") message("removing dl test ${source} ") @@ -47,20 +54,27 @@ function(add_test_executable TEST_NAME) endif() endforeach() foreach(source IN LISTS ARGN) - if(NOT GPU_TARGETS MATCHES "gfx9" AND source MATCHES "xdl") + if(NOT TEST_TARGETS MATCHES "gfx9" AND source MATCHES "xdl") message("removing xdl test ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() endforeach() foreach(source IN LISTS ARGN) - if(NOT GPU_TARGETS MATCHES "gfx11" AND source MATCHES "wmma") + if(NOT TEST_TARGETS MATCHES "gfx11" AND source MATCHES "wmma") message("removing wmma test ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() endforeach() #only continue if there are some source files left on the list if(ARGN) + if(ARGN MATCHES "_xdl") + list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) + elseif(ARGN MATCHES "_wmma") + list(REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) + endif() + set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) add_executable(${TEST_NAME} ${ARGN}) + set_property(TARGET ${TEST_NAME} PROPERTY HIP_ARCHITECTURES ${TEST_TARGETS} ) target_link_libraries(${TEST_NAME} PRIVATE getopt::getopt) add_test(NAME ${TEST_NAME} COMMAND $) add_dependencies(tests ${TEST_NAME}) @@ -105,6 +119,13 @@ function(add_gtest_executable TEST_NAME) endif() endforeach() endif() + + if(INSTANCES_ONLY) + set(TEST_TARGETS ${DEFAULT_GPU_TARGETS}) + else() + set(TEST_TARGETS ${GPU_TARGETS}) + endif() + foreach(source IN LISTS ARGN) if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl") message("removing dl test ${source} ") @@ -112,20 +133,27 @@ function(add_gtest_executable TEST_NAME) endif() endforeach() foreach(source IN LISTS ARGN) - if(NOT GPU_TARGETS MATCHES "gfx9" AND source MATCHES "xdl") + if(NOT TEST_TARGETS MATCHES "gfx9" AND source MATCHES "xdl") message("removing xdl test ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() endforeach() foreach(source IN LISTS ARGN) - if(NOT GPU_TARGETS MATCHES "gfx11" AND source MATCHES "wmma") + if(NOT TEST_TARGETS MATCHES "gfx11" AND source MATCHES "wmma") message("removing wmma test ${source} ") list(REMOVE_ITEM ARGN "${source}") endif() endforeach() #only continue if there are some source files left on the list if(ARGN) + if(ARGN MATCHES "_xdl") + list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) + elseif(ARGN MATCHES "_wmma") + list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) + endif() + set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) add_executable(${TEST_NAME} ${ARGN}) + set_property(TARGET ${TEST_NAME} PROPERTY HIP_ARCHITECTURES ${TEST_TARGETS} ) add_dependencies(tests ${TEST_NAME}) add_dependencies(check ${TEST_NAME})