mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
Add bias scalar vectorload = 1 for gemm bias gemm (#791)
* first change bias load
* add bias dim and scalervector parameter
* make CDE0BlockTransferSrcVectorDim not work
* changse toinstance
* add limit for CDE0BlockTransferSrcScalarPerVector
[ROCm/composable_kernel commit: 50643dd555]
This commit is contained in:
@@ -196,6 +196,8 @@ template <typename A0Layout,
|
||||
index_t B0BlockTransferSrcScalarPerVector,
|
||||
index_t B0BlockTransferDstScalarPerVector_BK1,
|
||||
bool B0BlockLdsExtraN,
|
||||
index_t CDE0BlockTransferSrcVectorDim,
|
||||
index_t CDE0BlockTransferSrcScalaerPerVector,
|
||||
typename B1BlockTransferThreadClusterLengths_BK0_N_BK1,
|
||||
typename B1BlockTransferThreadClusterArrangeOrder,
|
||||
typename B1BlockTransferSrcAccessOrder,
|
||||
@@ -492,6 +494,8 @@ struct DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle
|
||||
B0BlockTransferDstScalarPerVector_BK1,
|
||||
true,
|
||||
B0BlockLdsExtraN,
|
||||
CDE0BlockTransferSrcVectorDim,
|
||||
CDE0BlockTransferSrcScalaerPerVector,
|
||||
B1BlockTransferThreadClusterLengths_BK0_N_BK1,
|
||||
B1BlockTransferThreadClusterArrangeOrder,
|
||||
B1BlockTransferSrcAccessOrder,
|
||||
|
||||
Reference in New Issue
Block a user