mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[rocm-libraries] ROCm/rocm-libraries#4963 (commit cb6bbf6)
[CK][CK Tile] Fix batched gemm kernel 2 lds ## Motivation Fix 2 lds batched gemm universal gemm call. Disable split k for not valid atomic add instruction size. ## Technical Details Fix 2 lds batched gemm universal gemm call. Disable split k for not valid atomic add instruction size. ## Test Plan CI overall ## Test Result pending ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
22de6a19d9
commit
c1ec24a2de
@@ -228,34 +228,9 @@ struct BatchedGemmKernel
|
||||
CDataType* c_ptr = static_cast<CDataType*>(kargs.e_ptr) + batch_offset_C;
|
||||
|
||||
// allocate LDS
|
||||
__shared__ char smem_ptr0[GetSmemSize()];
|
||||
|
||||
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
|
||||
{
|
||||
__shared__ char smem_ptr1[GemmPipeline::GetSmemSize()];
|
||||
UniversalGemmKernel::RunGemm2LDS({a_ptr},
|
||||
{b_ptr},
|
||||
{/*ds_ptr*/},
|
||||
c_ptr,
|
||||
smem_ptr0,
|
||||
smem_ptr1,
|
||||
kargs,
|
||||
splitk_batch_offset,
|
||||
i_m,
|
||||
i_n);
|
||||
}
|
||||
else
|
||||
{
|
||||
UniversalGemmKernel::RunGemm({a_ptr},
|
||||
{b_ptr},
|
||||
{/*ds_ptr*/},
|
||||
c_ptr,
|
||||
smem_ptr0,
|
||||
kargs,
|
||||
splitk_batch_offset,
|
||||
i_m,
|
||||
i_n);
|
||||
}
|
||||
__shared__ char smem_ptr[GetSmemSize()];
|
||||
UniversalGemmKernel::RunGemm(
|
||||
{a_ptr}, {b_ptr}, {/*ds_ptr*/}, c_ptr, smem_ptr, kargs, splitk_batch_offset, i_m, i_n);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1156,9 +1156,13 @@ struct UniversalGemmKernel
|
||||
}
|
||||
else
|
||||
{
|
||||
auto c_block_window = MakeCBlockWindows<memory_operation_enum::atomic_add>(
|
||||
e_ptr, kargs, block_idx_m, block_idx_n);
|
||||
EpiloguePipeline{}(c_block_window, c_block_tile, ds_block_window, smem_ptr);
|
||||
if constexpr(EpiloguePipeline::GetVectorSizeC() % 2 == 0 ||
|
||||
!is_any_of<EDataType, fp16_t, bf16_t>::value)
|
||||
{
|
||||
auto c_block_window = MakeCBlockWindows<memory_operation_enum::atomic_add>(
|
||||
e_ptr, kargs, block_idx_m, block_idx_n);
|
||||
EpiloguePipeline{}(c_block_window, c_block_tile, ds_block_window, smem_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user