diff --git a/CMakeLists.txt b/CMakeLists.txt index 88b8f05200..f4d3a83c34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -220,6 +220,9 @@ rocm_check_target_ids(SUPPORTED_GPU_TARGETS message(STATUS "Building CK for the following targets: ${SUPPORTED_GPU_TARGETS}") +# Cache SUPPORTED_GPU_TARGETS for debug +set(SUPPORTED_GPU_TARGETS "${SUPPORTED_GPU_TARGETS}" CACHE STRING "List of supported GPU targets") + if (SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") message(STATUS "Enabling XDL instances") add_definitions(-DCK_USE_XDL) diff --git a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp index a86aa2f8ef..ce2d9299f9 100644 --- a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp +++ b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp @@ -1277,13 +1277,29 @@ struct MfmaSelector template <> constexpr auto GetMfma() { +#if defined(__gfx12__) + return MfmaInstr::wmma_unsupport_16x16_gfx12; +#elif defined(__gfx11__) + return MfmaInstr::wmma_unsupport_16x16_gfx11; +#elif defined(__gfx942__) return MfmaInstr::mfma_f32_32x32x4xf32; +#else + return MfmaInstr::mfma_f32_32x32x2f32; +#endif } template <> constexpr auto GetMfma() { +#if defined(__gfx12__) + return MfmaInstr::wmma_unsupport_16x16_gfx12; +#elif defined(__gfx11__) + return MfmaInstr::wmma_unsupport_16x16_gfx11; +#elif defined(__gfx942__) return MfmaInstr::mfma_f32_16x16x8xf32; +#else + return MfmaInstr::mfma_f32_16x16x4f32; +#endif } template <>