Do not use warpSize as compile time constant as it is removed

This commit is contained in:
Satyanvesh Dittakavi
2025-06-10 04:11:14 +00:00
committed by Rakesh Roy
parent 4c23fa3caa
commit 1b042bb70d
3 changed files with 9 additions and 3 deletions

View File

@@ -245,6 +245,12 @@ CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
namespace ck {
#if defined(__GFX9__)
__device__ static constexpr int WarpSize = 64;
#else
__device__ static constexpr int WarpSize = 32;
#endif
enum struct InMemoryDataOperationEnum
{
Set,

View File

@@ -141,7 +141,7 @@ struct BlockwiseGemmXdlops_pipeline_v2<BlockGemmPipelineScheduler::Intrawave,
using Base::BMmaKStride;
static constexpr index_t WgpPerCU =
(4 * warpSize / BlockSize) >= 1 ? 4 * warpSize / BlockSize : 1;
(4 * WarpSize / BlockSize) >= 1 ? 4 * WarpSize / BlockSize : 1;
static constexpr index_t FullMemBandPrefetchStages = math::integer_divide_ceil(
32768 / WgpPerCU,
(MPerBlock * sizeof(ADataType) + NPerBlock * sizeof(BDataType)) * KPerBlock);
@@ -634,7 +634,7 @@ struct BlockwiseGemmXdlops_pipeline_v2<BlockGemmPipelineScheduler::Interwave,
static constexpr index_t KRepeat = KPerThread / KPerInnerLoop;
static constexpr index_t WgpPerCU =
(4 * warpSize / BlockSize) >= 1 ? 4 * warpSize / BlockSize : 1;
(4 * WarpSize / BlockSize) >= 1 ? 4 * WarpSize / BlockSize : 1;
static constexpr index_t FullMemBandPrefetchStages = math::integer_divide_ceil(
32768 / WgpPerCU,
(MPerBlock * sizeof(ADataType) + NPerBlock * sizeof(BDataType)) * KPerBlock);

View File

@@ -32,7 +32,7 @@ static __device__ void gms_init(int NumWarps, int* p_control_bits)
// all the workgroups in the synchronization group is supposed to call this function
static __device__ void gms_barrier(int* p_control_bits)
{
constexpr int mask = warpSize - 1;
constexpr int mask = WarpSize - 1;
if((threadIdx.x & mask) == 0)
{