diff --git a/.gitignore b/.gitignore
index 294863ce8a..cdf5b64dec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,4 +45,4 @@ build*
*~
# GDB temporary files
-.gdb_history
\ No newline at end of file
+.gdb_history
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 39d2401fc7..1d2f57be30 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,8 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
enable_testing()
-find_package(ROCM REQUIRED PATHS /opt/rocm)
+set(ROCM_SYMLINK_LIBS OFF)
+find_package(ROCM 0.8 REQUIRED PATHS /opt/rocm)
include(ROCMInstallTargets)
include(ROCMPackageConfigHelpers)
@@ -16,7 +17,7 @@ include(ROCMInstallSymlinks)
include(ROCMCreatePackage)
include(CheckCXXCompilerFlag)
-rocm_setup_version(VERSION 1.0.0)
+rocm_setup_version(VERSION 0.2.0)
include(TargetFlags)
list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip)
@@ -70,7 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
endif()
message(STATUS "Build with HIP ${HIP_VERSION}")
-
rocm_create_package(
NAME composablekernel
DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
@@ -238,6 +238,11 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
+rocm_package_setup_component(tests
+ LIBRARY_NAME composablekernel
+ PACKAGE_NAME tests # Prevent -static suffix on package name
+)
+
add_subdirectory(library)
add_subdirectory(example)
add_subdirectory(test)
@@ -259,8 +264,19 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
-install(FILES
+rocm_install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfigVersion.cmake"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
)
+
+set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
+set(CPACK_RPM_PACKAGE_LICENSE "MIT")
+
+rocm_create_package(
+ NAME composablekernel
+ DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
+ MAINTAINER "MIOpen Kernels Dev Team
"
+ LDCONFIG
+ HEADER_ONLY
+)
diff --git a/Dockerfile b/Dockerfile
index 79c961144a..0d32b52f75 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -88,3 +88,8 @@ ADD rbuild.ini /rbuild.ini
ADD dev-requirements.txt dev-requirements.txt
RUN rbuild prepare -s develop -d $PREFIX
RUN groupadd -f render
+
+# Install the new rocm-cmake version
+RUN git clone -b master https://github.com/RadeonOpenCompute/rocm-cmake.git && \
+ cd rocm-cmake && mkdir build && cd build && \
+ cmake .. && cmake --build . && cmake --build . --target install
diff --git a/README.md b/README.md
index f6c933bf5b..5f9f95859b 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,9 @@ rocm/tensorflow:rocm5.1-tf2.6-dev \
/bin/bash
```
+# Install the new rocm-cmake version
+https://github.com/RadeonOpenCompute/rocm-cmake
+
## Build
```bash
mkdir build && cd build
@@ -34,7 +37,7 @@ Instructions for running each individual examples are under ```example/```
## Tests
```bash
- make -j tests
+ make -j examples tests
make test
```
diff --git a/cmake/googletest.cmake b/cmake/googletest.cmake
index 959bc4f4b0..3718b916ff 100644
--- a/cmake/googletest.cmake
+++ b/cmake/googletest.cmake
@@ -8,7 +8,7 @@ endif()
message(STATUS "Fetching GoogleTest")
-list(APPEND GTEST_CMAKE_CXX_FLAGS
+list(APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-undef
-Wno-reserved-identifier
-Wno-global-constructors
@@ -31,7 +31,11 @@ FetchContent_Declare(
# Will be necessary for windows build
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-FetchContent_MakeAvailable(googletest)
+FetchContent_GetProperties(googletest)
+if(NOT googletest_POPULATED)
+ FetchContent_Populate(googletest)
+ add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
+endif()
target_compile_options(gtest PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gtest_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
diff --git a/library/src/host_tensor/CMakeLists.txt b/library/src/host_tensor/CMakeLists.txt
index ae3ecf2eed..eca22c6091 100644
--- a/library/src/host_tensor/CMakeLists.txt
+++ b/library/src/host_tensor/CMakeLists.txt
@@ -11,22 +11,20 @@ target_compile_features(host_tensor PUBLIC)
set_target_properties(host_tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(host_tensor SYSTEM PUBLIC $)
-target_include_directories(host_tensor PUBLIC
+target_include_directories(host_tensor PUBLIC
"$"
"$"
"$"
)
-install(TARGETS host_tensor
- EXPORT host_tensorTargets
- LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
- ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
- RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
- INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+rocm_install(
+ TARGETS host_tensor
+ EXPORT host_tensorTargets
)
-install(EXPORT host_tensorTargets
- FILE composable_kernelhost_tensorTargets.cmake
+rocm_install(
+ EXPORT host_tensorTargets
+ FILE composable_kernelhost_tensorTargets.cmake
NAMESPACE composable_kernel::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
)
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
index c50b3ef649..73236b856b 100644
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
@@ -75,21 +75,17 @@ target_include_directories(device_operations PUBLIC
#once new arches are enabled make this an option on the main cmake file
# and pass down here to be exported
-target_compile_options(device_operations PRIVATE
+target_compile_options(device_operations PRIVATE
--offload-arch=gfx908
--offload-arch=gfx90a
)
# install(TARGETS device_operations LIBRARY DESTINATION lib)
-install(TARGETS device_operations
- EXPORT device_operationsTargets
- LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
- ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
- RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
- INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-)
-install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck)
-install(EXPORT device_operationsTargets
+rocm_install(TARGETS device_operations
+ EXPORT device_operationsTargets)
+
+rocm_install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck)
+rocm_install(EXPORT device_operationsTargets
FILE composable_kerneldevice_operationsTargets.cmake
NAMESPACE composable_kernel::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
index 7c384a882b..7d3c57b235 100644
--- a/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
@@ -3,9 +3,9 @@ set(DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
)
-add_library(device_conv2d_bwd_weight_instance OBJECT ${DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE})
+add_library(device_conv2d_bwd_weight_instance OBJECT ${DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE})
target_compile_features(device_conv2d_bwd_weight_instance PUBLIC)
set_target_properties(device_conv2d_bwd_weight_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
-install(TARGETS device_conv2d_bwd_weight_instance LIBRARY DESTINATION lib)
+rocm_install(TARGETS device_conv2d_bwd_weight_instance)
clang_tidy_check(device_conv2d_bwd_weight_instance)
diff --git a/library/src/tensor_operation_instance/gpu/convnd_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/convnd_bwd_data/CMakeLists.txt
index 037f860808..dae633b7da 100644
--- a/library/src/tensor_operation_instance/gpu/convnd_bwd_data/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/convnd_bwd_data/CMakeLists.txt
@@ -1,5 +1,5 @@
# device_convnd_bwd_data_instance
-set(DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE
+set(DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp;
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp;
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp;
@@ -12,11 +12,11 @@ set(DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp;
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp;
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp;
-)
+)
add_library(device_convnd_bwd_data_instance OBJECT ${DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE})
target_compile_features(device_convnd_bwd_data_instance PUBLIC)
set_target_properties(device_convnd_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
-install(TARGETS device_convnd_bwd_data_instance LIBRARY DESTINATION lib)
+rocm_install(TARGETS device_convnd_bwd_data_instance)
clang_tidy_check(device_convnd_bwd_data_instance)
diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt
index 0d068646af..aec16bcf77 100644
--- a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt
@@ -6,5 +6,5 @@ set(DEVICE_GEMM_REDUCE_INSTANCE_SOURCE
)
add_instance_library(device_gemm_bias_add_reduce_instance ${DEVICE_GEMM_REDUCE_INSTANCE_SOURCE})
-install(TARGETS device_gemm_bias_add_reduce_instance LIBRARY DESTINATION lib)
+rocm_install(TARGETS device_gemm_bias_add_reduce_instance)
clang_tidy_check(device_gemm_bias_add_reduce_instance)
diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt
index 5bc6d17a93..5fbdc28d7b 100644
--- a/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt
@@ -6,5 +6,5 @@ set(DEVICE_GEMM_REDUCE_INSTANCE_SOURCE
)
add_instance_library(device_gemm_reduce_instance ${DEVICE_GEMM_REDUCE_INSTANCE_SOURCE})
-install(TARGETS device_gemm_reduce_instance LIBRARY DESTINATION lib)
+rocm_install(TARGETS device_gemm_reduce_instance)
clang_tidy_check(device_gemm_reduce_instance)
diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt
index 6c5e31fddd..4d1115ceb6 100644
--- a/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt
@@ -6,10 +6,10 @@ set(DEVICE_GROUPED_GEMM_INSTANCE_SOURCE
device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
)
-add_library(device_grouped_gemm_instance OBJECT ${DEVICE_GROUPED_GEMM_INSTANCE_SOURCE})
+add_library(device_grouped_gemm_instance OBJECT ${DEVICE_GROUPED_GEMM_INSTANCE_SOURCE})
target_compile_features(device_grouped_gemm_instance PUBLIC)
set_target_properties(device_grouped_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON)
-install(TARGETS device_grouped_gemm_instance LIBRARY DESTINATION lib)
+rocm_install(TARGETS device_grouped_gemm_instance)
clang_tidy_check(device_grouped_gemm_instance)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 47c13d33e0..f8b07487d9 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -13,6 +13,7 @@ function(add_test_executable TEST_NAME)
add_test(NAME ${TEST_NAME} COMMAND $ )
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
+ rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
endfunction(add_test_executable TEST_NAME)
include(GoogleTest)
@@ -26,6 +27,7 @@ function(add_gtest_executable TEST_NAME)
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
target_link_libraries(${TEST_NAME} PRIVATE gtest_main)
gtest_discover_tests(${TEST_NAME})
+ rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
endfunction(add_gtest_executable TEST_NAME)