Fix grouped_gemm_splitk kernels on MI300. (#694)

* replace amd_buffer_atomic_add with hip_atomic_add

* fix grouped_gemm_splitk kernels on mi300

* fix syntax

* revert experimental atomic_add changes

---------

Co-authored-by: Jing Zhang <jizhan@amd.com>
This commit is contained in:
Illia Silin
2023-05-03 08:25:25 -07:00
committed by GitHub
parent 86e0190ec9
commit 4a51d2da9d
2 changed files with 3 additions and 2 deletions

View File

@@ -34,7 +34,8 @@ __global__ void
kernel_grouped_gemm_xdl_splitk(const void CK_CONSTANT_ADDRESS_SPACE* gemm_descs_const,
const index_t group_count)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__))
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
defined(__gfx940__))
constexpr index_t shared_size = GridwiseGemm::GetSharedMemoryNumberOfByte();
__shared__ uint8_t p_shared[shared_size];