mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
[CK_TILE] Change output accum tensor layout of fmha fwd split-kv & combine kernels (#1527)
* Use same layout for o_acc and o tensor
* Use better param names in partitioner
* Remove redundant kargs 'max_seqlen_q'
* Use better param names in splitkv kernel
* Add comment for additional kernel arguments
* Sync empty loop early return logics between pipelines
* Pass more arguments to cmake in scripts
* Align backslashes
* Fix wrong o_acc tensor view strides
* Change o_acc layout if o_perm=0
* Handle whole row masked via attn_bias
* Use use vector width = 1 for o_acc
* Use more even split sizes
[ROCm/composable_kernel commit: a1c07e8d91]
This commit is contained in:
@@ -7,8 +7,10 @@ MY_PROJECT_SOURCE=$1
|
||||
|
||||
if [ $# -ge 2 ] ; then
|
||||
GPU_TARGETS=$2
|
||||
REST_ARGS=${@:3}
|
||||
else
|
||||
GPU_TARGETS="gfx908;gfx90a;gfx940"
|
||||
REST_ARGS=
|
||||
fi
|
||||
|
||||
cmake \
|
||||
@@ -20,4 +22,5 @@ cmake
|
||||
-D GPU_TARGETS=$GPU_TARGETS \
|
||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
||||
-D USE_BITINT_EXTENSION_INT4=OFF \
|
||||
$REST_ARGS \
|
||||
${MY_PROJECT_SOURCE}
|
||||
|
||||
@@ -7,8 +7,10 @@ MY_PROJECT_SOURCE=$1
|
||||
|
||||
if [ $# -ge 2 ] ; then
|
||||
GPU_TARGETS=$2
|
||||
REST_ARGS=${@:3}
|
||||
else
|
||||
GPU_TARGETS="gfx908;gfx90a;gfx940"
|
||||
REST_ARGS=
|
||||
fi
|
||||
|
||||
cmake \
|
||||
@@ -20,5 +22,6 @@ cmake
|
||||
-D GPU_TARGETS=$GPU_TARGETS \
|
||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
|
||||
-D USE_BITINT_EXTENSION_INT4=OFF \
|
||||
$REST_ARGS \
|
||||
${MY_PROJECT_SOURCE}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user