[rocm-libraries] ROCm/rocm-libraries#4963 (commit cb6bbf6)

[CK][CK Tile] Fix batched gemm kernel 2 lds

## Motivation

Fix 2 lds batched gemm universal gemm call. Disable split k for not
valid atomic add instruction size.

## Technical Details

Fix 2 lds batched gemm universal gemm call. Disable split k for not
valid atomic add instruction size.

## Test Plan

CI overall

## Test Result

pending

## Submission Checklist

- [x] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
Bartłomiej Kocot
2026-02-27 22:17:07 +00:00
committed by assistant-librarian[bot]
parent 22de6a19d9
commit c1ec24a2de
2 changed files with 10 additions and 31 deletions

View File

@@ -228,34 +228,9 @@ struct BatchedGemmKernel
CDataType* c_ptr = static_cast<CDataType*>(kargs.e_ptr) + batch_offset_C;
// allocate LDS
__shared__ char smem_ptr0[GetSmemSize()];
if constexpr(GemmPipeline::DoubleSmemBuffer == true)
{
__shared__ char smem_ptr1[GemmPipeline::GetSmemSize()];
UniversalGemmKernel::RunGemm2LDS({a_ptr},
{b_ptr},
{/*ds_ptr*/},
c_ptr,
smem_ptr0,
smem_ptr1,
kargs,
splitk_batch_offset,
i_m,
i_n);
}
else
{
UniversalGemmKernel::RunGemm({a_ptr},
{b_ptr},
{/*ds_ptr*/},
c_ptr,
smem_ptr0,
kargs,
splitk_batch_offset,
i_m,
i_n);
}
__shared__ char smem_ptr[GetSmemSize()];
UniversalGemmKernel::RunGemm(
{a_ptr}, {b_ptr}, {/*ds_ptr*/}, c_ptr, smem_ptr, kargs, splitk_batch_offset, i_m, i_n);
}
};

View File

@@ -1156,9 +1156,13 @@ struct UniversalGemmKernel
}
else
{
auto c_block_window = MakeCBlockWindows<memory_operation_enum::atomic_add>(
e_ptr, kargs, block_idx_m, block_idx_n);
EpiloguePipeline{}(c_block_window, c_block_tile, ds_block_window, smem_ptr);
if constexpr(EpiloguePipeline::GetVectorSizeC() % 2 == 0 ||
!is_any_of<EDataType, fp16_t, bf16_t>::value)
{
auto c_block_window = MakeCBlockWindows<memory_operation_enum::atomic_add>(
e_ptr, kargs, block_idx_m, block_idx_n);
EpiloguePipeline{}(c_block_window, c_block_tile, ds_block_window, smem_ptr);
}
}
}