mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
fix(grouped_gemm): pipeline selection when tail_num varies per group and leads to numerical error (#2863)
* fix(grouped_gemm): numerical errors on gfx950 by correctly calculating the tail num * WIP: add temp config to stress test numerical error correction * refactor: remove comments
This commit is contained in:
@@ -292,34 +292,8 @@ struct GroupedGemmKernel
|
||||
{
|
||||
|
||||
__shared__ char smem_ptr_1[GetSmemSize()];
|
||||
if constexpr(UsePersistentKernel || GemmPipeline::Preshuffle)
|
||||
{
|
||||
|
||||
RunGemmWithPipelineSelection2LDS(a_ptr,
|
||||
b_ptr,
|
||||
c_ptr,
|
||||
smem_ptr_0,
|
||||
smem_ptr_1,
|
||||
kargs,
|
||||
splitk_batch_offset,
|
||||
i_m,
|
||||
i_n);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Base::RunGemm2LDS({a_ptr},
|
||||
{b_ptr},
|
||||
{/*ds_ptr*/},
|
||||
c_ptr,
|
||||
smem_ptr_0,
|
||||
smem_ptr_1,
|
||||
kargs,
|
||||
splitk_batch_offset,
|
||||
i_m,
|
||||
i_n);
|
||||
}
|
||||
RunGemmWithPipelineSelection2LDS(
|
||||
a_ptr, b_ptr, c_ptr, smem_ptr_0, smem_ptr_1, kargs, splitk_batch_offset, i_m, i_n);
|
||||
}
|
||||
else // SingleSmemBuffer
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user