mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 05:31:24 +00:00
Extend XDL kernel to Support RDNA3/4 - Part 4 (#2724)
* Fix example * fix build error * update pk_i4 & moe test case * fix all instance build (examples) * fix batched_gemm_gemm (example) * disable example_gemm_bias_softmax_gemm_permute on gfx11 * remove unnecessary disable gfx11 * update tests * update tests2
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
@@ -68,10 +68,10 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmMultipleD_Xdl
|
||||
64, // KPerBlock,
|
||||
16, // AK1,
|
||||
16, // BK1,
|
||||
32, // MPerXDL,
|
||||
32, // NPerXDL,
|
||||
4, // MXdlPerWave,
|
||||
2, // NXdlPerWave,
|
||||
16, // MPerXDL,
|
||||
16, // NPerXDL,
|
||||
8, // MXdlPerWave,
|
||||
4, // NXdlPerWave,
|
||||
S<4, 64, 1>, // ABlockTransferThreadClusterLengths_AK0_M_AK1,
|
||||
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder,
|
||||
S<1, 0, 2>, // ABlockTransferSrcAccessOrder,
|
||||
@@ -88,8 +88,8 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmMultipleD_Xdl
|
||||
1, // bool BBlockLdsExtraN,
|
||||
1, // index_t CShuffleMXdlPerWavePerShuffle,
|
||||
1, // index_t CShuffleNXdlPerWavePerShuffle,
|
||||
S<1, 64, 1, 4>, // typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock,
|
||||
16>; // index_t CShuffleBlockTransferScalarPerVector_NPerBlock>
|
||||
S<1, 32, 1, 8>, // typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock,
|
||||
4>; // index_t CShuffleBlockTransferScalarPerVector_NPerBlock>
|
||||
// clang-format on
|
||||
|
||||
using ReferenceGemmInstance = ck::tensor_operation::host::
|
||||
|
||||
Reference in New Issue
Block a user