From 32e805b85347e4daa1ba34ce39542bb8a5847a9b Mon Sep 17 00:00:00 2001 From: Estevan Vedovelli Date: Mon, 5 Jan 2026 13:03:30 -0500 Subject: [PATCH] Add support to gfx1153 and fix gfx115X WMMA config (#3496) * Support for gfx115X * Changes for gfx115X * Add gfx1153 * Update changelog --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> [ROCm/composable_kernel commit: 1224bc0a82fbf47e1452bc4dbd63371471e57d4a] --- CHANGELOG.md | 1 + include/ck/utility/amd_wmma.hpp | 3 ++- include/ck_tile/core/arch/arch.hpp | 5 +++++ include/ck_tile/core/config.hpp | 8 ++++++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce8e5197a8..b149a74df3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj * Added support for microscaling (MX) FP8/FP4 mixed data types to Flatmm pipeline. * Added support for fp8 dynamic tensor-wise quantization of fp8 fmha fwd kernel. * Added FP8 KV cache support for FMHA batch prefill. +* Added support for gfx1153 target. * Added FMHA batch prefill kernel support for several KV cache layouts, flexible page sizes, and different lookup table configurations. ### Changed diff --git a/include/ck/utility/amd_wmma.hpp b/include/ck/utility/amd_wmma.hpp index 35389bda37..057687985d 100644 --- a/include/ck/utility/amd_wmma.hpp +++ b/include/ck/utility/amd_wmma.hpp @@ -10,7 +10,8 @@ namespace ck { #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || \ - defined(__gfx1103__) || defined(__gfx11_generic__) + defined(__gfx1103__) || defined(__gfx1150__) || defined(__gfx1151__) || \ + defined(__gfx1152__) || defined(__gfx1153__) || defined(__gfx11_generic__) #define __gfx11__ #endif diff --git a/include/ck_tile/core/arch/arch.hpp b/include/ck_tile/core/arch/arch.hpp index a162195390..c5c1a6e2c6 100644 --- a/include/ck_tile/core/arch/arch.hpp +++ b/include/ck_tile/core/arch/arch.hpp @@ -87,6 +87,7 @@ enum struct amdgcn_target_id GFX1150 = 0x1150, GFX1151 = 0x1151, GFX1152 = 0x1152, + GFX1153 = 0x1153, GFX11_GENERIC = 0x11FF, GFX1200 = 0x1200, GFX1201 = 0x1201, @@ -282,6 +283,7 @@ constexpr auto get_compiler_target() MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1150, GFX1150); MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1151, GFX1151); MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1152, GFX1152); + MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1153, GFX1153); MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX11_GENERIC, GFX11_GENERIC); MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1200, GFX1200); MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1201, GFX1201); @@ -348,6 +350,7 @@ CK_TILE_HOST auto hip_device_prop_gcn_arch_name_to_amdgcn_target_id(char const* MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1150", GFX1150); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1151", GFX1151); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1152", GFX1152); + MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1153", GFX1153); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx11_generic", GFX11_GENERIC); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1200", GFX1200); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1201", GFX1201); @@ -603,6 +606,7 @@ CK_TILE_HOST_DEVICE constexpr auto get_compiler_target() MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1150, GFX1150); MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1151, GFX1151); MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1152, GFX1152); + MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1153, GFX1153); MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX11_GENERIC, GFX11_GENERIC); MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1200, GFX1200); MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1201, GFX1201); @@ -683,6 +687,7 @@ CK_TILE_HOST auto hip_device_prop_gcn_arch_name_to_amdgcn_target(char const* tes MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1150", GFX1150); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1151", GFX1151); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1152", GFX1152); + MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1153", GFX1153); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx11_generic", GFX11_GENERIC); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX12_TARGET("gfx1200", GFX1200); MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX12_TARGET("gfx1201", GFX1201); diff --git a/include/ck_tile/core/config.hpp b/include/ck_tile/core/config.hpp index 7830749efb..fed9209bad 100644 --- a/include/ck_tile/core/config.hpp +++ b/include/ck_tile/core/config.hpp @@ -315,6 +315,7 @@ namespace ck_tile::core { * @var CK_TILE_ARCH_GFX1102 Indicates if the compiler target architecture is GFX1102. * @var CK_TILE_ARCH_GFX1151 Indicates if the compiler target architecture is GFX1151. * @var CK_TILE_ARCH_GFX1152 Indicates if the compiler target architecture is GFX1152. + * @var CK_TILE_ARCH_GFX1153 Indicates if the compiler target architecture is GFX1153. * @var CK_TILE_ARCH_GFX11_GENERIC Indicates if the compiler target architecture is GFX11 generic. * @var CK_TILE_ARCH_GFX1200 Indicates if the compiler target architecture is GFX1200. * @var CK_TILE_ARCH_GFX1201 Indicates if the compiler target architecture is GFX1201. @@ -468,6 +469,12 @@ struct amdgcn_compiler_target_state static constexpr bool CK_TILE_ARCH_GFX1152 = false; #endif // __gfx1152__ +#if defined(__gfx1153__) + static constexpr bool CK_TILE_ARCH_GFX1153 = true; +#else + static constexpr bool CK_TILE_ARCH_GFX1153 = false; +#endif // __gfx1153__ + #if defined(__gfx11_generic__) static constexpr bool CK_TILE_ARCH_GFX11_GENERIC = true; #else @@ -538,6 +545,7 @@ CK_TILE_HOST_DEVICE static constexpr uint32_t count_values_of(T search, Ts... se amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1150, \ amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1151, \ amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1152, \ + amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1153, \ amdgcn_compiler_target_state::CK_TILE_ARCH_GFX11_GENERIC, \ amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1200, \ amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1201, \