ck: add CK_USE_GFX950 macro (#3636)

This commit is contained in:
yinglu
2026-01-27 03:38:45 +08:00
committed by GitHub
parent b8751e505d
commit 8942a19d5e
4 changed files with 7 additions and 9 deletions

View File

@@ -259,6 +259,11 @@ if ((SUPPORTED_GPU_TARGETS MATCHES "gfx94" OR SUPPORTED_GPU_TARGETS MATCHES "gfx
add_definitions(-DCK_USE_GFX94)
set(CK_USE_GFX94 "ON")
endif()
if (SUPPORTED_GPU_TARGETS MATCHES "gfx950" AND NOT FORCE_DISABLE_XDL)
message(STATUS "Enabling XDL FP8 gemms on gfx950")
add_definitions(-DCK_USE_GFX950)
set(CK_USE_GFX950 "ON")
endif()
# new macro CK_TILE_USE_WMMA in order to separately compile examples for MFMA/WMMA
set(CK_TILE_USE_WMMA 0)

View File

@@ -55,9 +55,6 @@
#ifndef CK_ENABLE_FP32
#define CK_ENABLE_FP32 "ON"
#endif
#ifndef CK_ENABLE_TF32
#define CK_ENABLE_TF32 "ON"
#endif
#ifndef CK_ENABLE_FP64
#define CK_ENABLE_FP64 "ON"
#endif
@@ -88,10 +85,6 @@
#cmakedefine CK_ENABLE_FP32 @CK_ENABLE_FP32@
#endif
#ifndef CK_ENABLE_TF32
#cmakedefine CK_ENABLE_TF32 @CK_ENABLE_TF32@
#endif
#ifndef CK_ENABLE_FP64
#cmakedefine CK_ENABLE_FP64 @CK_ENABLE_FP64@
#endif

View File

@@ -376,7 +376,7 @@ using device_grouped_conv_bwd_data_xdl_f32_optimized_loads_instances =
// clang-format on
>;
#if defined(__gfx950__)
#if defined(CK_USE_GFX950)
constexpr auto _k_per_block = 32;
#else
constexpr auto _k_per_block = 16;

View File

@@ -147,7 +147,7 @@ using device_grouped_conv_fwd_xdl_merged_groups_f32_instances = std::tuple<
// clang-format on
>;
#if defined(__gfx950__)
#if defined(CK_USE_GFX950)
constexpr auto _k_per_block = 32;
#else
constexpr auto _k_per_block = 16;