mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 21:51:28 +00:00
* Use same layout for o_acc and o tensor * Use better param names in partitioner * Remove redundant kargs 'max_seqlen_q' * Use better param names in splitkv kernel * Add comment for additional kernel arguments * Sync empty loop early return logics between pipelines * Pass more arguments to cmake in scripts * Align backslashes * Fix wrong o_acc tensor view strides * Change o_acc layout if o_perm=0 * Handle whole row masked via attn_bias * Use use vector width = 1 for o_acc * Use more even split sizes
27 lines
1.3 KiB
Bash
Executable File
27 lines
1.3 KiB
Bash
Executable File
#!/bin/bash
|
|
rm -f CMakeCache.txt
|
|
rm -f *.cmake
|
|
rm -rf CMakeFiles
|
|
|
|
MY_PROJECT_SOURCE=$1
|
|
|
|
if [ $# -ge 2 ] ; then
|
|
GPU_TARGETS=$2
|
|
REST_ARGS=${@:3}
|
|
else
|
|
GPU_TARGETS="gfx908;gfx90a;gfx940"
|
|
REST_ARGS=
|
|
fi
|
|
|
|
cmake \
|
|
-D CMAKE_PREFIX_PATH=/opt/rocm \
|
|
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
|
|
-D CMAKE_CXX_FLAGS="-Xclang -mllvm -Xclang -enable-post-misched=0 -std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \
|
|
-D CMAKE_BUILD_TYPE=Release \
|
|
-D BUILD_DEV=ON \
|
|
-D GPU_TARGETS=$GPU_TARGETS \
|
|
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
|
-D USE_BITINT_EXTENSION_INT4=OFF \
|
|
$REST_ARGS \
|
|
${MY_PROJECT_SOURCE}
|