mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 11:30:02 +00:00
Code clean up (#20)
* tuning para,
* testing on v100
* add fp16
* remove deprecated tensor descriptor
* sync with miopen
* update build script
Co-authored-by: Jing Zhang <jizhan@amd.com>
[ROCm/composable_kernel commit: 5c7cec1115]
This commit is contained in:
@@ -1,24 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -f CMakeCache.txt
|
||||
rm -f *.cmake
|
||||
rm -rf CMakeFiles
|
||||
|
||||
MY_PROJECT_SOURCE=/home/chao/code/modular_convolution
|
||||
MY_PROJECT_INSTALL=../install.dir
|
||||
MY_PROJECT_SOURCE=../../../
|
||||
|
||||
export CUDA_ROOT=/usr/local/cuda
|
||||
export CPATH=$CPATH:$CUDA_ROOT/include
|
||||
export LIBRARY_PATH=$LIBRARY_PATH:$CUDA_ROOT/lib64
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64
|
||||
|
||||
cmake \
|
||||
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
|
||||
-D CMAKE_CXX_COMPILER=clang++ \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
||||
-D DEVICE_BACKEND=NVIDIA \
|
||||
-D CUDA_COMMON_INCLUDE_DIR="/package/install/cuda/10.1/NVIDIA_CUDA-10.1_Samples/common/inc" \
|
||||
-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -maxrregcount=128" \
|
||||
${MY_PROJECT_SOURCE}
|
||||
|
||||
#-D BOOST_ROOT="/package/install/boost_1.67.0" \
|
||||
|
||||
#-D CMAKE_CUDA_COMPILER="/package/install/cuda_10.0/bin/nvcc" \
|
||||
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61" \
|
||||
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -maxrregcount=128" \
|
||||
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_70,code=sm_70" \
|
||||
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_70,code=sm_70 -Xptxas -v -maxrregcount=128" \
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
MY_PROJECT_SOURCE=../../../
|
||||
MY_PROJECT_INSTALL=../install.dir
|
||||
|
||||
export CUDA_ROOT=/usr/local/cuda
|
||||
export CPATH=$CPATH:$CUDA_ROOT/include
|
||||
export LIBRARY_PATH=$LIBRARY_PATH:$CUDA_ROOT/lib64
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64
|
||||
|
||||
cmake \
|
||||
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
|
||||
-D CMAKE_CXX_COMPILER=clang++-6.0 \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
||||
-D DEVICE_BACKEND=NVIDIA \
|
||||
-D CUDA_COMMON_INCLUDE_DIR="/root/NVIDIA_CUDA-10.1_Samples/common/inc" \
|
||||
-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -maxrregcount=128" \
|
||||
${MY_PROJECT_SOURCE}
|
||||
|
||||
|
||||
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61" \
|
||||
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -maxrregcount=128" \
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -f CMakeCache.txt
|
||||
rm -f *.cmake
|
||||
rm -rf CMakeFiles
|
||||
@@ -11,9 +10,10 @@ cmake
|
||||
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D DEVICE_BACKEND="AMD" \
|
||||
-D HIP_HIPCC_FLAGS="${HIP_HIPCC_FLAGS} -gline-tables-only -v" \
|
||||
-D CMAKE_CXX_FLAGS="-gline-tables-only --amdgpu-target=gfx906" \
|
||||
-D CMAKE_CXX_FLAGS="--amdgpu-target=gfx906" \
|
||||
-D CMAKE_CXX_COMPILER=/opt/rocm/hip/bin/hipcc \
|
||||
-D CMAKE_PREFIX_PATH="/opt/rocm" \
|
||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
||||
${MY_PROJECT_SOURCE}
|
||||
|
||||
#-D CMAKE_CXX_FLAGS="-gline-tables-only -v --amdgpu-target=gfx906" \
|
||||
21
script/cmake-rocm3.5.sh
Executable file
21
script/cmake-rocm3.5.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
rm -f CMakeCache.txt
|
||||
rm -f *.cmake
|
||||
rm -rf CMakeFiles
|
||||
|
||||
MY_PROJECT_SOURCE=../../../
|
||||
MY_PROJECT_INSTALL=../install.dir
|
||||
|
||||
cmake \
|
||||
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D DEVICE_BACKEND="AMD" \
|
||||
-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
|
||||
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
|
||||
-D CMAKE_PREFIX_PATH="/opt/rocm" \
|
||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
||||
${MY_PROJECT_SOURCE}
|
||||
|
||||
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
|
||||
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -save-temps" \
|
||||
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -v -gline-tables-only -save-temps" \
|
||||
@@ -1,3 +0,0 @@
|
||||
WORKSPACE=$1
|
||||
echo "workspace: " $WORKSPACE
|
||||
sudo docker run -it -v $WORKSPACE:/root/workspace --group-add sudo --runtime=nvidia asroy/cuda:10.1-cudnn7-devel-ubuntu18.04-latest /bin/bash
|
||||
@@ -1,9 +0,0 @@
|
||||
# step 1: GET ISA DUMP
|
||||
#cd /root/workspace/mlopen/modular_convolution/build/hipcc/build.dir/driver && KMDUMPISA=1 /opt/rocm/hip/bin/hipcc -I/root/workspace/mlopen/modular_convolution/build/hipcc/build.dir/composable_kernel/include/utility -I/root/workspace/mlopen/modular_convolution/driver/include -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/kernel_algorithm -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/tensor_operation -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/tensor_description -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/utility -I/root/workspace/mlopen/modular_convolution/composable_kernel/include -gline-tables-only --amdgpu-target=gfx906 -fopenmp=libomp -O3 -DNDEBUG -std=c++14 -o CMakeFiles/driver.dir/src/driver.cpp.o -c /root/workspace/mlopen/modular_convolution/driver/src/driver.cpp -fno-gpu-rdc
|
||||
|
||||
# step 2: HACK ISA
|
||||
#cd /root/workspace/mlopen/modular_convolution/build/hipcc/build.dir/driver && KMHACKISA=1 /opt/rocm/hip/bin/hipcc -I/root/workspace/mlopen/modular_convolution/build/hipcc/build.dir/composable_kernel/include/utility -I/root/workspace/mlopen/modular_convolution/driver/include -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/kernel_algorithm -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/tensor_operation -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/tensor_description -I/root/workspace/mlopen/modular_convolution/composable_kernel/include/utility -I/root/workspace/mlopen/modular_convolution/composable_kernel/include -gline-tables-only --amdgpu-target=gfx906 -fopenmp=libomp -O3 -DNDEBUG -std=c++14 -o CMakeFiles/driver.dir/src/driver.cpp.o -c /root/workspace/mlopen/modular_convolution/driver/src/driver.cpp -fno-gpu-rdc
|
||||
|
||||
# step 3: LINK
|
||||
#/opt/rocm/hip/bin/hipcc -gline-tables-only --amdgpu-target=gfx906 -fopenmp=libomp -O3 -DNDEBUG CMakeFiles/driver.dir/src/driver.cpp.o -o driver -rdynamic libhost.so -Wl,-rpath,/root/workspace/mlopen/modular_convolution/build/hipcc/build.dir/driver
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
/root/workspace/rocprofiler_pkg/bin/rpl_run.sh --timestamp on -i /root/workspace/rocprofiler_pkg/input.xml -d ./trace ./driver/driver 0 10
|
||||
@@ -1,3 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
/root/workspace/rocprofiler_pkg/bin/rpl_run.sh --timestamp on -i /root/workspace/rocprofiler_pkg/input.xml -d ./trace ./driver/driver 0 10
|
||||
Reference in New Issue
Block a user