Files
composable_kernel/include/ck/utility/amd_arch.hpp
Illia Silin 717f2efef7 [rocm-libraries] ROCm/rocm-libraries#6978 (commit e58096d)
[CK] add composable kernel support on gfx1250 (#6978)

## Motivation

Add composable kernel support on gfx1250.

## Technical Details

<!-- Explain the changes along with any relevant GitHub links. -->

## Test Plan

<!-- Explain any relevant testing done to verify this PR. -->

## Test Result

<!-- Briefly summarize test outcomes. -->

## Submission Checklist

- [ ] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.

---------

Co-authored-by: Qun Lin <qlin@amd.com>
Co-authored-by: jialuo12_amdeng <jia.luo@amd.com>
Co-authored-by: Andriy Roshchenko <andriy.roshchenko@amd.com>
Co-authored-by: hsivasun_amdeng <haresh.sivasuntharampillai@amd.com>
2026-05-15 06:46:51 -07:00

126 lines
1.9 KiB
C++

// SPDX-License-Identifier: MIT
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
#pragma once
#include "ck/ck.hpp"
namespace ck {
// Architecture tags
struct gfx9_t
{
};
struct gfx950_t
{
};
struct gfx103_t
{
};
struct gfx11_t
{
};
struct gfx12_t
{
};
struct gfx120_t
{
};
struct gfx125_t
{
};
struct gfx_invalid_t
{
};
static constexpr auto get_device_arch()
{
#if defined(__gfx950__)
return gfx950_t{};
#elif defined(__gfx9__)
return gfx9_t{};
#elif defined(__gfx10__)
return gfx103_t{};
#elif defined(__gfx11__)
return gfx11_t{};
#elif defined(__gfx125__)
return gfx125_t{};
#elif defined(__gfx12__)
return gfx120_t{};
#else
return gfx_invalid_t{};
#endif
}
template <typename DeviceArch>
static constexpr index_t get_lds_size(DeviceArch)
{
return 64 * 1024;
}
template <>
constexpr index_t get_lds_size<gfx950_t>(gfx950_t)
{
return 160 * 1024;
}
template <>
constexpr index_t get_lds_size<gfx125_t>(gfx125_t)
{
return 320 * 1024;
}
template <typename DeviceArch>
static constexpr index_t get_n_lds_banks(DeviceArch)
{
return 32;
}
template <>
constexpr index_t get_n_lds_banks<gfx950_t>(gfx950_t)
{
return 64;
}
template <>
constexpr index_t get_n_lds_banks<gfx125_t>(gfx125_t)
{
return 64;
}
template <typename DeviceArch>
static constexpr index_t get_max_vgpr_count(DeviceArch)
{
return 256;
}
template <>
constexpr index_t get_max_vgpr_count<gfx950_t>(gfx950_t)
{
return 512;
}
template <>
constexpr index_t get_max_vgpr_count<gfx9_t>(gfx9_t)
{
return 512;
}
template <>
constexpr index_t get_max_vgpr_count<gfx125_t>(gfx125_t)
{
return 1024;
}
template <typename DeviceArch>
static constexpr index_t get_vgpr_count_per_simd(DeviceArch)
{
return 1024;
}
template <>
constexpr index_t get_vgpr_count_per_simd<gfx9_t>(gfx9_t)
{
return 512;
}
template <>
constexpr index_t get_vgpr_count_per_simd<gfx950_t>(gfx950_t)
{
return 512;
}
} // namespace ck