mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Fix build logic using GRU_ARCHS. (#1536)
* update build logic with GPU_ARCHS
* fix the GPU_ARCHS build for codegen
* unset GPU_TARGETS when GPU_ARCHS are set
[ROCm/composable_kernel commit: 7d8ea5f08b]
This commit is contained in:
101
CMakeLists.txt
101
CMakeLists.txt
@@ -98,11 +98,6 @@ if(DL_KERNELS)
|
||||
set(CK_ENABLE_DL_KERNELS "ON")
|
||||
endif()
|
||||
|
||||
if(INSTANCES_ONLY)
|
||||
add_definitions(-DINSTANCES_ONLY)
|
||||
set(CK_ENABLE_INSTANCES_ONLY "ON")
|
||||
endif()
|
||||
|
||||
include(getopt)
|
||||
|
||||
# CK version file to record release version as well as git commit hash
|
||||
@@ -127,6 +122,12 @@ rocm_setup_version(VERSION ${version})
|
||||
list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip "$ENV{ROCM_PATH}" "$ENV{HIP_PATH}")
|
||||
|
||||
message("GPU_TARGETS= ${GPU_TARGETS}")
|
||||
message("GPU_ARCHS= ${GPU_ARCHS}")
|
||||
if(GPU_ARCHS)
|
||||
#disable GPU_TARGETS to avoid conflicts, this needs to happen before we call hip package
|
||||
unset(GPU_TARGETS CACHE)
|
||||
unset(AMDGPU_TARGETS CACHE)
|
||||
endif()
|
||||
|
||||
find_package(hip)
|
||||
# No assumption that HIP kernels are launched with uniform block size for backward compatibility
|
||||
@@ -135,55 +136,38 @@ math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR})
|
||||
message("hip_version_flat=${hip_VERSION_FLAT}")
|
||||
|
||||
message("checking which targets are supported")
|
||||
#This is the list of targets to be used in case GPU_TARGETS is not set on command line
|
||||
#These targets will be filtered and only supported ones will be used
|
||||
#Setting GPU_TARGETS on command line will override this list
|
||||
if(NOT PROFILER_ONLY)
|
||||
if(NOT ENABLE_ASAN_PACKAGING)
|
||||
#build CK for all supported targets
|
||||
if(NOT WIN32 AND ${hip_VERSION_FLAT} LESS 600300000)
|
||||
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS
|
||||
TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102")
|
||||
else()
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS
|
||||
TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
|
||||
endif()
|
||||
#In order to build just the CK library (without tests and examples) for all supported GPU targets
|
||||
#use -D GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
#the GPU_TARGETS flag will be reset in this case in order to avoid conflicts.
|
||||
#
|
||||
#In order to build CK along with all tests and examples it should be OK to set GPU_TARGETS to just 1 or 2 similar architectures.
|
||||
if(NOT ENABLE_ASAN_PACKAGING)
|
||||
if(NOT WIN32 AND ${hip_VERSION_FLAT} LESS 600300000)
|
||||
# WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
|
||||
set(CK_GPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102")
|
||||
else()
|
||||
#build CK only for xnack-supported targets
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS
|
||||
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+")
|
||||
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING " " FORCE)
|
||||
set(CK_GPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
|
||||
endif()
|
||||
else()
|
||||
add_definitions(-DPROFILER_ONLY)
|
||||
set(GPU_TARGETS "" CACHE STRING "" FORCE)
|
||||
#build CK only for xnack-supported targets when using ASAN
|
||||
set(CK_GPU_TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+")
|
||||
endif()
|
||||
|
||||
#if user set GPU_ARCHS on the cmake command line, overwrite default target list with user's list
|
||||
#otherwise, if user set GPU_TARGETS, use that set of targets
|
||||
if(GPU_ARCHS)
|
||||
set(CK_GPU_TARGETS ${GPU_ARCHS})
|
||||
else()
|
||||
if(GPU_TARGETS)
|
||||
message(FATAL_ERROR "For PROFILE_ONLY build, please do not set GPU_TARGETS, use GPU_ARCH = gfx90, gfx94, gfx10, gfx11 or gfx12")
|
||||
set(CK_GPU_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
if(GPU_ARCH MATCHES "gfx90")
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx908;gfx90a")
|
||||
elseif(GPU_ARCH MATCHES "gfx94")
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx940;gfx941;gfx942")
|
||||
elseif(GPU_ARCH MATCHES "gfx10")
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1030")
|
||||
elseif(GPU_ARCH MATCHES "gfx11")
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1100;gfx1101;gfx1102")
|
||||
elseif(GPU_ARCH MATCHES "gfx12")
|
||||
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1200;gfx1201")
|
||||
else()
|
||||
message(FATAL_ERROR "For PROFILE_ONLY build, please specify GPU_ARCH as gfx90, gfx94, gfx10, gfx11 or gfx12")
|
||||
endif()
|
||||
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING " " FORCE)
|
||||
endif()
|
||||
|
||||
message("Supported GPU_TARGETS= ${DEFAULT_GPU_TARGETS}")
|
||||
#make sure all the targets on the list are actually supported by the current compiler
|
||||
rocm_check_target_ids(SUPPORTED_GPU_TARGETS
|
||||
TARGETS ${CK_GPU_TARGETS})
|
||||
|
||||
if(GPU_TARGETS)
|
||||
message("Building CK for the following targets: ${GPU_TARGETS}")
|
||||
else()
|
||||
message("Building CK for the default targets: ${DEFAULT_GPU_TARGETS}")
|
||||
endif()
|
||||
message("Building CK for the following targets: ${SUPPORTED_GPU_TARGETS}")
|
||||
|
||||
if (GPU_TARGETS)
|
||||
if (GPU_TARGETS MATCHES "gfx9")
|
||||
@@ -557,8 +541,7 @@ ENDFOREACH()
|
||||
add_custom_target(instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES})
|
||||
add_subdirectory(library)
|
||||
|
||||
if(NOT DEFINED INSTANCES_ONLY)
|
||||
if(NOT DEFINED PROFILER_ONLY)
|
||||
if(NOT GPU_ARCHS)
|
||||
rocm_package_setup_component(tests
|
||||
LIBRARY_NAME composablekernel
|
||||
PACKAGE_NAME tests # Prevent -static suffix on package name
|
||||
@@ -572,23 +555,15 @@ if(NOT DEFINED INSTANCES_ONLY)
|
||||
if(BUILD_TESTING)
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
|
||||
rocm_package_setup_component(profiler
|
||||
LIBRARY_NAME composablekernel
|
||||
PACKAGE_NAME ckprofiler
|
||||
)
|
||||
add_subdirectory(profiler)
|
||||
else()
|
||||
#When building PROFILER_ONLY, label the package with GPU_ARCH
|
||||
rocm_package_setup_component(profiler
|
||||
LIBRARY_NAME composablekernel
|
||||
PACKAGE_NAME ckprofiler_${GPU_ARCH}
|
||||
)
|
||||
add_subdirectory(profiler)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED PROFILER_ONLY AND (GPU_TARGETS MATCHES "gfx9" OR DEFINED INSTANCES_ONLY))
|
||||
rocm_package_setup_component(profiler
|
||||
LIBRARY_NAME composablekernel
|
||||
PACKAGE_NAME ckprofiler
|
||||
)
|
||||
add_subdirectory(profiler)
|
||||
|
||||
if(GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS)
|
||||
add_subdirectory(codegen)
|
||||
endif()
|
||||
|
||||
|
||||
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
@@ -1138,8 +1138,8 @@ pipeline {
|
||||
execute_args = """ cmake -D CMAKE_PREFIX_PATH=/opt/rocm \
|
||||
-D CMAKE_CXX_COMPILER="${build_compiler()}" \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D INSTANCES_ONLY=ON \
|
||||
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
|
||||
-D GPU_ARCHS="gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
|
||||
-D CMAKE_CXX_FLAGS=" -O3 " .. && make -j64 """
|
||||
}
|
||||
steps{
|
||||
buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
|
||||
|
||||
11
README.md
11
README.md
@@ -90,7 +90,12 @@ Docker images are available on [DockerHub](https://hub.docker.com/r/rocm/composa
|
||||
```
|
||||
|
||||
If you don't set `GPU_TARGETS` on the cmake command line, CK is built for all GPU targets
|
||||
supported by the current compiler (this may take a long time).
|
||||
supported by the current compiler (this may take a long time).
|
||||
|
||||
NOTE: If you try setting `GPU_TARGETS` to a list of architectures, the build will only work if the
|
||||
architectures are similar, e.g., `gfx908;gfx90a`, or `gfx1100;gfx1101;gfx11012`. Otherwise, if you
|
||||
want to build the library for a list of different architectures,
|
||||
you should use the `GPU_ARCHS` build argument, for example `GPU_ARCHS=gfx908;gfx1030;gfx1100;gfx942`.
|
||||
|
||||
4. Build the entire CK library:
|
||||
|
||||
@@ -137,10 +142,6 @@ crash. In such cases, you can reduce the number of threads to 32 by using `-j32`
|
||||
|
||||
Additional cmake flags can be used to significantly speed-up the build:
|
||||
|
||||
* `INSTANCES_ONLY` (default is OFF) must be set to ON in order to build only the instances and library
|
||||
while skipping all tests, examples, and profiler. This is useful in cases when you plan to use CK as a
|
||||
dependency and don't plan to run any examples or tests.
|
||||
|
||||
* `DTYPES` (default is not set) can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build
|
||||
instances of select data types only. The main default data types are fp32 and fp16; you can safely skip
|
||||
other data types.
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
|
||||
add_subdirectory(rtc)
|
||||
file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp)
|
||||
if(NOT INSTANCES_ONLY)
|
||||
# do not build the tests when we build the library for various targets
|
||||
if(NOT GPU_ARCHS)
|
||||
foreach(TEST_SRC ${TEST_SRCS})
|
||||
set_source_files_properties(${TEST_SRC} PROPERTIES LANGUAGE HIP)
|
||||
get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE)
|
||||
|
||||
@@ -45,11 +45,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(INSTANCES_ONLY)
|
||||
set(EX_TARGETS ${DEFAULT_GPU_TARGETS})
|
||||
else()
|
||||
set(EX_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
set(EX_TARGETS ${SUPPORTED_GPU_TARGETS})
|
||||
|
||||
#Do not build any DL examples if DL_KERNELS not set
|
||||
foreach(source IN LISTS FILE_NAME)
|
||||
@@ -147,11 +143,8 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(INSTANCES_ONLY)
|
||||
set(EX_TARGETS ${DEFAULT_GPU_TARGETS})
|
||||
else()
|
||||
set(EX_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
set(EX_TARGETS ${SUPPORTED_GPU_TARGETS})
|
||||
|
||||
#Do not build any DL examples if DL_KERNELS not set
|
||||
foreach(source IN LISTS FILE_NAME)
|
||||
if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl")
|
||||
|
||||
@@ -97,13 +97,6 @@
|
||||
#cmakedefine CK_ENABLE_DL_KERNELS @CK_ENABLE_DL_KERNELS@
|
||||
#endif
|
||||
|
||||
//
|
||||
// Instances supports in the current CK build
|
||||
//
|
||||
#ifndef CK_ENABLE_INSTANCES_ONLY
|
||||
#cmakedefine CK_ENABLE_INSTANCES_ONLY @CK_ENABLE_INSTANCES_ONLY@
|
||||
#endif
|
||||
|
||||
//
|
||||
// CK kernels which support XDL (MI series)
|
||||
//
|
||||
|
||||
@@ -37,11 +37,7 @@ function(add_instance_library INSTANCE_NAME)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(INSTANCES_ONLY)
|
||||
set(INST_TARGETS ${DEFAULT_GPU_TARGETS})
|
||||
else()
|
||||
set(INST_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
set(INST_TARGETS ${SUPPORTED_GPU_TARGETS})
|
||||
|
||||
# Do not build DL instances if DL_KERNELS macro is not set
|
||||
foreach(source IN LISTS ARGN)
|
||||
@@ -75,11 +71,7 @@ function(add_instance_library INSTANCE_NAME)
|
||||
if(ARGN)
|
||||
set(INST_OBJ)
|
||||
foreach(source IN LISTS ARGN)
|
||||
if(INSTANCES_ONLY)
|
||||
set(INST_TARGETS ${DEFAULT_GPU_TARGETS})
|
||||
else()
|
||||
set(INST_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
set(INST_TARGETS ${SUPPORTED_GPU_TARGETS})
|
||||
if(source MATCHES "_xdl")
|
||||
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
|
||||
elseif(ARGN MATCHES "_wmma")
|
||||
@@ -191,12 +183,7 @@ FOREACH(subdir_path ${dir_list})
|
||||
set(add_inst 1)
|
||||
endif()
|
||||
|
||||
if(INSTANCES_ONLY)
|
||||
set(INST_TARGETS ${DEFAULT_GPU_TARGETS})
|
||||
else()
|
||||
set(INST_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
|
||||
set(INST_TARGETS ${SUPPORTED_GPU_TARGETS})
|
||||
|
||||
if(("${cmake_instance}" MATCHES "quantization") AND (DEFINED DTYPES) AND (NOT DTYPES MATCHES "int8"))
|
||||
message("quantization instances will not be built!")
|
||||
|
||||
@@ -24,7 +24,7 @@ set(PROFILER_SOURCES
|
||||
profile_permute_scale.cpp
|
||||
)
|
||||
|
||||
if(GPU_TARGETS MATCHES "gfx9")
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
|
||||
if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
|
||||
list(APPEND PROFILER_SOURCES profile_contraction_bilinear.cpp)
|
||||
list(APPEND PROFILER_SOURCES profile_contraction_scale.cpp)
|
||||
@@ -49,7 +49,7 @@ if(GPU_TARGETS MATCHES "gfx9")
|
||||
list(APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp)
|
||||
endif()
|
||||
list(APPEND PROFILER_SOURCES profile_gemm_multiply_add.cpp)
|
||||
if(GPU_TARGETS MATCHES "gfx94")
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx94")
|
||||
list(APPEND PROFILER_SOURCES profile_gemm_multiply_multiply.cpp)
|
||||
list(APPEND PROFILER_SOURCES profile_gemm_ab_scale.cpp)
|
||||
endif()
|
||||
@@ -69,7 +69,7 @@ if(GPU_TARGETS MATCHES "gfx9")
|
||||
|
||||
endif()
|
||||
|
||||
if(GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12" OR GPU_TARGETS MATCHES "gfx9")
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12" OR SUPPORTED_GPU_TARGETS MATCHES "gfx9")
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
list(APPEND PROFILER_SOURCES profile_gemm_bilinear.cpp)
|
||||
endif()
|
||||
@@ -111,7 +111,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_column_to_image_inst
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_transpose_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_permute_scale_instance)
|
||||
|
||||
if(GPU_TARGETS MATCHES "gfx9")
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
|
||||
if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_bilinear_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_scale_instance)
|
||||
@@ -135,7 +135,7 @@ if(GPU_TARGETS MATCHES "gfx9")
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_reduce_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_add_instance)
|
||||
if(GPU_TARGETS MATCHES "gfx94")
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx94")
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_multiply_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_ab_scale_instance)
|
||||
endif()
|
||||
@@ -159,7 +159,7 @@ if(GPU_TARGETS MATCHES "gfx9")
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_fwd_convinvscale_instance)
|
||||
endif()
|
||||
|
||||
if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12")
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12")
|
||||
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bilinear_instance)
|
||||
endif()
|
||||
|
||||
@@ -41,11 +41,7 @@ function(add_test_executable TEST_NAME)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(INSTANCES_ONLY)
|
||||
set(TEST_TARGETS ${DEFAULT_GPU_TARGETS})
|
||||
else()
|
||||
set(TEST_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
set(TEST_TARGETS ${SUPPORTED_GPU_TARGETS})
|
||||
|
||||
foreach(source IN LISTS ARGN)
|
||||
if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl")
|
||||
@@ -122,11 +118,7 @@ function(add_gtest_executable TEST_NAME)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(INSTANCES_ONLY)
|
||||
set(TEST_TARGETS ${DEFAULT_GPU_TARGETS})
|
||||
else()
|
||||
set(TEST_TARGETS ${GPU_TARGETS})
|
||||
endif()
|
||||
set(TEST_TARGETS ${SUPPORTED_GPU_TARGETS})
|
||||
|
||||
foreach(source IN LISTS ARGN)
|
||||
if(NOT DEFINED DL_KERNELS AND source MATCHES "_dl")
|
||||
@@ -211,10 +203,10 @@ add_subdirectory(conv_tensor_rearrange)
|
||||
add_subdirectory(transpose)
|
||||
add_subdirectory(permute_scale)
|
||||
add_subdirectory(wrapper)
|
||||
if(GPU_TARGETS MATCHES "gfx11")
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx11")
|
||||
add_subdirectory(wmma_op)
|
||||
endif()
|
||||
if(GPU_TARGETS MATCHES "gfx942" AND CK_HIP_VERSION_MAJOR GREATER_EQUAL 6 AND CK_HIP_VERSION_MINOR GREATER_EQUAL 2) # smfmac needs ROCm6.2
|
||||
if(SUPPORTED_GPU_TARGETS MATCHES "gfx942" AND CK_HIP_VERSION_MAJOR GREATER_EQUAL 6 AND CK_HIP_VERSION_MINOR GREATER_EQUAL 2) # smfmac needs ROCm6.2
|
||||
add_subdirectory(smfmac_op)
|
||||
endif()
|
||||
add_subdirectory(position_embedding)
|
||||
|
||||
Reference in New Issue
Block a user