mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
* tuning para,
* testing on v100
* add fp16
* remove deprecated tensor descriptor
* sync with miopen
* update build script
Co-authored-by: Jing Zhang <jizhan@amd.com>
[ROCm/composable_kernel commit: 5c7cec1115]
22 lines
1.6 KiB
Bash
Executable File
22 lines
1.6 KiB
Bash
Executable File
#!/bin/bash
|
|
rm -f CMakeCache.txt
|
|
rm -f *.cmake
|
|
rm -rf CMakeFiles
|
|
|
|
MY_PROJECT_SOURCE=../../../
|
|
MY_PROJECT_INSTALL=../install.dir
|
|
|
|
cmake \
|
|
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
|
|
-D CMAKE_BUILD_TYPE=Release \
|
|
-D DEVICE_BACKEND="AMD" \
|
|
-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
|
|
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
|
|
-D CMAKE_PREFIX_PATH="/opt/rocm" \
|
|
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
|
${MY_PROJECT_SOURCE}
|
|
|
|
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
|
|
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -save-temps" \
|
|
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -v -gline-tables-only -save-temps" \
|