mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
* replace buffer_atomic with global_atomic * fixed global_atomic_add * added bf16 atomic_add * format * clang-format-12 * clean * clean * add guards * Update gtest.cmake * enabled splitk_gemm_multi_d * format * add ckProfiler * format * fixed naming * format * clean * clean * add guards * fix clang format * format * add kbatch printout * clean * Add rocm6.2 related gemm optimization * Limit bf16 atomic usage * remove redundant RCR gemm_universal instance * Add RRR fp8 gemm universal instance * Bug fix * Add GPU_TARGET guard to FP8/BF8 target * bug fix * update cmake * remove all fp8/bf8 example if arch not support * Enable fp8 RRR support in ckProfiler * limit greedy-reverse flag to gemm_universal in ckProfiler --------- Co-authored-by: Jing Zhang <jizhan@fb.com> Co-authored-by: Jing Zhang <jizhan@meta.com> Co-authored-by: zjing14 <zhangjing14@gmail.com> Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> Co-authored-by: illsilin <Illia.Silin@amd.com>
99 lines
2.9 KiB
C++
99 lines
2.9 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include <string>
|
|
#include <map>
|
|
#include <hip/hip_runtime.h>
|
|
|
|
namespace ck {
|
|
|
|
inline std::string get_device_name()
|
|
{
|
|
hipDeviceProp_t props{};
|
|
int device;
|
|
auto status = hipGetDevice(&device);
|
|
if(status != hipSuccess)
|
|
{
|
|
return std::string();
|
|
}
|
|
|
|
status = hipGetDeviceProperties(&props, device);
|
|
if(status != hipSuccess)
|
|
{
|
|
return std::string();
|
|
}
|
|
const std::string raw_name(props.gcnArchName);
|
|
|
|
// https://github.com/ROCm/MIOpen/blob/8498875aef84878e04c1eabefdf6571514891086/src/target_properties.cpp#L40
|
|
static std::map<std::string, std::string> device_name_map = {
|
|
{"Ellesmere", "gfx803"},
|
|
{"Baffin", "gfx803"},
|
|
{"RacerX", "gfx803"},
|
|
{"Polaris10", "gfx803"},
|
|
{"Polaris11", "gfx803"},
|
|
{"Tonga", "gfx803"},
|
|
{"Fiji", "gfx803"},
|
|
{"gfx800", "gfx803"},
|
|
{"gfx802", "gfx803"},
|
|
{"gfx804", "gfx803"},
|
|
{"Vega10", "gfx900"},
|
|
{"gfx901", "gfx900"},
|
|
{"10.3.0 Sienna_Cichlid 18", "gfx1030"},
|
|
};
|
|
|
|
const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str.
|
|
|
|
auto match = device_name_map.find(name);
|
|
if(match != device_name_map.end())
|
|
return match->second;
|
|
return name;
|
|
}
|
|
|
|
inline bool is_xdl_supported()
|
|
{
|
|
return ck::get_device_name() == "gfx908" || ck::get_device_name() == "gfx90a" ||
|
|
ck::get_device_name() == "gfx940" || ck::get_device_name() == "gfx941" ||
|
|
ck::get_device_name() == "gfx942";
|
|
}
|
|
|
|
inline bool is_lds_direct_load_supported()
|
|
{
|
|
// Check if direct loads from global memory to LDS are supported.
|
|
return ck::get_device_name() == "gfx90a" || ck::get_device_name() == "gfx940" ||
|
|
ck::get_device_name() == "gfx941" || ck::get_device_name() == "gfx942";
|
|
}
|
|
|
|
inline bool is_bf16_atomic_supported()
|
|
{
|
|
return ck::get_device_name() == "gfx940" || ck::get_device_name() == "gfx941" ||
|
|
ck::get_device_name() == "gfx942";
|
|
}
|
|
|
|
inline bool is_gfx101_supported()
|
|
{
|
|
return ck::get_device_name() == "gfx1010" || ck::get_device_name() == "gfx1011" ||
|
|
ck::get_device_name() == "gfx1012";
|
|
}
|
|
|
|
inline bool is_gfx103_supported()
|
|
{
|
|
return ck::get_device_name() == "gfx1030" || ck::get_device_name() == "gfx1031" ||
|
|
ck::get_device_name() == "gfx1032" || ck::get_device_name() == "gfx1034" ||
|
|
ck::get_device_name() == "gfx1035" || ck::get_device_name() == "gfx1036";
|
|
}
|
|
|
|
inline bool is_gfx11_supported()
|
|
{
|
|
return ck::get_device_name() == "gfx1100" || ck::get_device_name() == "gfx1101" ||
|
|
ck::get_device_name() == "gfx1102" || ck::get_device_name() == "gfx1103";
|
|
}
|
|
|
|
inline bool is_gfx12_supported()
|
|
{
|
|
return ck::get_device_name() == "gfx1200" || ck::get_device_name() == "gfx1201";
|
|
}
|
|
|
|
} // namespace ck
|