mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Using number of compute units to set gridSize (#754)
* Add getAvailableComputeUnitCount() interface * Use available number of compute units to set kernel grid size
This commit is contained in:
43
include/ck/host_utility/stream_utility.hpp
Normal file
43
include/ck/host_utility/stream_utility.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "ck/stream_config.hpp"
|
||||
#include "ck/host_utility/hip_check_error.hpp"
|
||||
|
||||
static int getAvailableComputeUnitCount(const StreamConfig& stream_config)
|
||||
{
|
||||
constexpr int MAX_MASK_DWORDS = 64;
|
||||
|
||||
// assume at most 64*32 = 2048 CUs
|
||||
uint32_t cuMask[MAX_MASK_DWORDS];
|
||||
|
||||
for(int i = 0; i < MAX_MASK_DWORDS; i++)
|
||||
cuMask[i] = 0;
|
||||
|
||||
auto countSetBits = [](uint32_t dword) {
|
||||
int count = 0;
|
||||
|
||||
while(dword != 0)
|
||||
{
|
||||
if(dword & 0x1)
|
||||
count++;
|
||||
|
||||
dword = dword >> 1;
|
||||
};
|
||||
|
||||
return (count);
|
||||
};
|
||||
|
||||
hip_check_error(hipExtStreamGetCUMask(stream_config.stream_id_, MAX_MASK_DWORDS, &cuMask[0]));
|
||||
|
||||
int ret = 0;
|
||||
|
||||
for(int i = 0; i < MAX_MASK_DWORDS; i++)
|
||||
ret += countSetBits(cuMask[i]);
|
||||
|
||||
return (ret);
|
||||
};
|
||||
Reference in New Issue
Block a user