mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
@@ -53,8 +53,7 @@ __global__ void
|
||||
const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||
const index_t num_k_per_block)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
|
||||
defined(__gfx94__))
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||
const index_t g_idx = __builtin_amdgcn_readfirstlane(blockIdx.z);
|
||||
const index_t k_idx = __builtin_amdgcn_readfirstlane(blockIdx.y * num_k_per_block);
|
||||
|
||||
@@ -82,7 +81,7 @@ __global__ void
|
||||
k_idx);
|
||||
#else
|
||||
ignore = karg;
|
||||
#endif // end of if (defined(__gfx908__) || defined(__gfx90a__))
|
||||
#endif // end of if (defined(__gfx9__)
|
||||
}
|
||||
|
||||
template <typename GridwiseGemm,
|
||||
@@ -107,8 +106,7 @@ __global__ void
|
||||
const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
|
||||
const index_t num_k_per_block)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \
|
||||
defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||
// offset base pointer for each work-group
|
||||
const index_t g_idx = __builtin_amdgcn_readfirstlane(blockIdx.z);
|
||||
const index_t k_idx = __builtin_amdgcn_readfirstlane(blockIdx.y * num_k_per_block);
|
||||
@@ -142,7 +140,7 @@ __global__ void
|
||||
k_idx);
|
||||
#else
|
||||
ignore = karg;
|
||||
#endif // end of if (defined(__gfx908__) || defined(__gfx90a__))
|
||||
#endif // end of if (defined(__gfx9__)
|
||||
}
|
||||
|
||||
// out[N, Ho, Wo, K] = in[N, Hi, Wi, C] * wei[K, Y, X, C]
|
||||
|
||||
Reference in New Issue
Block a user