mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 18:42:06 +00:00
* Add maxpool f32 kernel and example
* Revise copyright
* Add device pool bwd device op
* Support f16 and bf16
* Add compute datatype for reference code.
Prevent error in bf16
* Fix type error
* Remove layout
* Fix bf16 error
* Add f16 and bf16 example
* Add more operations
* Implement IsSupportedArgument
* Add changelog
* Add comment
* Add comment
* Remove useless header
* Move initialize of workspace to the run
* Move set din zero to the device operator
* Save din_length_raw
* Remove useless header
* Calculate gridsize according to the number of CU
* Calculate gridSize according to the number of CU.
Remove useless header
* Add put example
* Remove useless header
* Fix CI fail
[ROCm/composable_kernel commit: 341ad95665]
44 lines
967 B
C++
44 lines
967 B
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include <hip/hip_runtime.h>
|
|
|
|
#include "ck/stream_config.hpp"
|
|
#include "ck/host_utility/hip_check_error.hpp"
|
|
|
|
static inline int getAvailableComputeUnitCount(const StreamConfig& stream_config)
|
|
{
|
|
constexpr int MAX_MASK_DWORDS = 64;
|
|
|
|
// assume at most 64*32 = 2048 CUs
|
|
uint32_t cuMask[MAX_MASK_DWORDS];
|
|
|
|
for(int i = 0; i < MAX_MASK_DWORDS; i++)
|
|
cuMask[i] = 0;
|
|
|
|
auto countSetBits = [](uint32_t dword) {
|
|
int count = 0;
|
|
|
|
while(dword != 0)
|
|
{
|
|
if(dword & 0x1)
|
|
count++;
|
|
|
|
dword = dword >> 1;
|
|
};
|
|
|
|
return (count);
|
|
};
|
|
|
|
hip_check_error(hipExtStreamGetCUMask(stream_config.stream_id_, MAX_MASK_DWORDS, &cuMask[0]));
|
|
|
|
int ret = 0;
|
|
|
|
for(int i = 0; i < MAX_MASK_DWORDS; i++)
|
|
ret += countSetBits(cuMask[i]);
|
|
|
|
return (ret);
|
|
};
|