Using number of compute units to set gridSize (#754)

* Add getAvailableComputeUnitCount() interface

* Use available number of compute units to set kernel grid size
This commit is contained in:
Qianfeng
2023-06-15 23:13:59 +08:00
committed by GitHub
parent d1838d328c
commit c5f6ec842c
3 changed files with 97 additions and 58 deletions

View File

@@ -0,0 +1,43 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <hip/hip_runtime.h>
#include "ck/stream_config.hpp"
#include "ck/host_utility/hip_check_error.hpp"
static int getAvailableComputeUnitCount(const StreamConfig& stream_config)
{
constexpr int MAX_MASK_DWORDS = 64;
// assume at most 64*32 = 2048 CUs
uint32_t cuMask[MAX_MASK_DWORDS];
for(int i = 0; i < MAX_MASK_DWORDS; i++)
cuMask[i] = 0;
auto countSetBits = [](uint32_t dword) {
int count = 0;
while(dword != 0)
{
if(dword & 0x1)
count++;
dword = dword >> 1;
};
return (count);
};
hip_check_error(hipExtStreamGetCUMask(stream_config.stream_id_, MAX_MASK_DWORDS, &cuMask[0]));
int ret = 0;
for(int i = 0; i < MAX_MASK_DWORDS; i++)
ret += countSetBits(cuMask[i]);
return (ret);
};