mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Merge commit '1224bc0a82fbf47e1452bc4dbd63371471e57d4a' into develop
This commit is contained in:
@@ -11,6 +11,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj
|
||||
* Added support for microscaling (MX) FP8/FP4 mixed data types to Flatmm pipeline.
|
||||
* Added support for fp8 dynamic tensor-wise quantization of fp8 fmha fwd kernel.
|
||||
* Added FP8 KV cache support for FMHA batch prefill.
|
||||
* Added support for gfx1153 target.
|
||||
* Added FMHA batch prefill kernel support for several KV cache layouts, flexible page sizes, and different lookup table configurations.
|
||||
|
||||
### Changed
|
||||
|
||||
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
@@ -1469,8 +1469,8 @@ pipeline {
|
||||
environment{
|
||||
setup_args = "NO_CK_BUILD"
|
||||
execute_args = """ ../script/cmake-ck-dev.sh ../ gfx90a && \
|
||||
make -j64 test_grouped_convnd_fwd_large_cases test_grouped_convnd_bwd_data_xdl_large_cases test_grouped_convnd_fwd_bias_clamp_large_cases && \
|
||||
./bin/test_grouped_convnd_fwd_large_cases && ./bin/test_grouped_convnd_bwd_data_xdl_large_cases && ./bin/test_grouped_convnd_fwd_bias_clamp_large_cases"""
|
||||
make -j64 test_grouped_convnd_fwd_large_cases test_grouped_convnd_bwd_data_large_cases test_grouped_convnd_fwd_bias_clamp_large_cases && \
|
||||
./bin/test_grouped_convnd_fwd_large_cases && ./bin/test_grouped_convnd_bwd_data_large_cases && ./bin/test_grouped_convnd_fwd_bias_clamp_large_cases"""
|
||||
}
|
||||
steps{
|
||||
buildHipClangJobAndReboot(setup_args:setup_args, build_type: 'Release', execute_cmd: execute_args)
|
||||
|
||||
@@ -10,7 +10,8 @@
|
||||
namespace ck {
|
||||
|
||||
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || \
|
||||
defined(__gfx1103__) || defined(__gfx11_generic__)
|
||||
defined(__gfx1103__) || defined(__gfx1150__) || defined(__gfx1151__) || \
|
||||
defined(__gfx1152__) || defined(__gfx1153__) || defined(__gfx11_generic__)
|
||||
#define __gfx11__
|
||||
#endif
|
||||
|
||||
|
||||
@@ -87,6 +87,7 @@ enum struct amdgcn_target_id
|
||||
GFX1150 = 0x1150,
|
||||
GFX1151 = 0x1151,
|
||||
GFX1152 = 0x1152,
|
||||
GFX1153 = 0x1153,
|
||||
GFX11_GENERIC = 0x11FF,
|
||||
GFX1200 = 0x1200,
|
||||
GFX1201 = 0x1201,
|
||||
@@ -282,6 +283,7 @@ constexpr auto get_compiler_target()
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1150, GFX1150);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1151, GFX1151);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1152, GFX1152);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1153, GFX1153);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX11_GENERIC, GFX11_GENERIC);
|
||||
MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1200, GFX1200);
|
||||
MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1201, GFX1201);
|
||||
@@ -348,6 +350,7 @@ CK_TILE_HOST auto hip_device_prop_gcn_arch_name_to_amdgcn_target_id(char const*
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1150", GFX1150);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1151", GFX1151);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1152", GFX1152);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1153", GFX1153);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx11_generic", GFX11_GENERIC);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1200", GFX1200);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_TARGET_ID("gfx1201", GFX1201);
|
||||
@@ -603,6 +606,7 @@ CK_TILE_HOST_DEVICE constexpr auto get_compiler_target()
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1150, GFX1150);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1151, GFX1151);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1152, GFX1152);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX1153, GFX1153);
|
||||
MAP_COMPILER_STATE_TO_GFX11_TARGET(CK_TILE_ARCH_GFX11_GENERIC, GFX11_GENERIC);
|
||||
MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1200, GFX1200);
|
||||
MAP_COMPILER_STATE_TO_GFX12_TARGET(CK_TILE_ARCH_GFX1201, GFX1201);
|
||||
@@ -683,6 +687,7 @@ CK_TILE_HOST auto hip_device_prop_gcn_arch_name_to_amdgcn_target(char const* tes
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1150", GFX1150);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1151", GFX1151);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1152", GFX1152);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx1153", GFX1153);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX11_TARGET("gfx11_generic", GFX11_GENERIC);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX12_TARGET("gfx1200", GFX1200);
|
||||
MAP_HIP_DEVICE_PROP_GCN_ARCH_NAME_STRING_TO_GFX12_TARGET("gfx1201", GFX1201);
|
||||
|
||||
@@ -315,6 +315,7 @@ namespace ck_tile::core {
|
||||
* @var CK_TILE_ARCH_GFX1102 Indicates if the compiler target architecture is GFX1102.
|
||||
* @var CK_TILE_ARCH_GFX1151 Indicates if the compiler target architecture is GFX1151.
|
||||
* @var CK_TILE_ARCH_GFX1152 Indicates if the compiler target architecture is GFX1152.
|
||||
* @var CK_TILE_ARCH_GFX1153 Indicates if the compiler target architecture is GFX1153.
|
||||
* @var CK_TILE_ARCH_GFX11_GENERIC Indicates if the compiler target architecture is GFX11 generic.
|
||||
* @var CK_TILE_ARCH_GFX1200 Indicates if the compiler target architecture is GFX1200.
|
||||
* @var CK_TILE_ARCH_GFX1201 Indicates if the compiler target architecture is GFX1201.
|
||||
@@ -468,6 +469,12 @@ struct amdgcn_compiler_target_state
|
||||
static constexpr bool CK_TILE_ARCH_GFX1152 = false;
|
||||
#endif // __gfx1152__
|
||||
|
||||
#if defined(__gfx1153__)
|
||||
static constexpr bool CK_TILE_ARCH_GFX1153 = true;
|
||||
#else
|
||||
static constexpr bool CK_TILE_ARCH_GFX1153 = false;
|
||||
#endif // __gfx1153__
|
||||
|
||||
#if defined(__gfx11_generic__)
|
||||
static constexpr bool CK_TILE_ARCH_GFX11_GENERIC = true;
|
||||
#else
|
||||
@@ -538,6 +545,7 @@ CK_TILE_HOST_DEVICE static constexpr uint32_t count_values_of(T search, Ts... se
|
||||
amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1150, \
|
||||
amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1151, \
|
||||
amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1152, \
|
||||
amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1153, \
|
||||
amdgcn_compiler_target_state::CK_TILE_ARCH_GFX11_GENERIC, \
|
||||
amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1200, \
|
||||
amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1201, \
|
||||
|
||||
Reference in New Issue
Block a user