mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Extend XDL kernel to Support RDNA3/4 - Part 4 (#2724)
* Fix example * fix build error * update pk_i4 & moe test case * fix all instance build (examples) * fix batched_gemm_gemm (example) * disable example_gemm_bias_softmax_gemm_permute on gfx11 * remove unnecessary disable gfx11 * update tests * update tests2
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -48,8 +48,6 @@ struct ThreadwiseTensorSliceTransfer_v7r3
|
||||
{
|
||||
static constexpr auto I0 = Number<0>{};
|
||||
|
||||
static constexpr auto SrcScalarPerVector = SrcScalarPerVectors{}[I0];
|
||||
|
||||
static constexpr index_t nDim = SliceLengths::Size();
|
||||
|
||||
static constexpr index_t nSrc = SrcDescs::Size();
|
||||
@@ -67,6 +65,10 @@ struct ThreadwiseTensorSliceTransfer_v7r3
|
||||
Number<Descs::Size()>{});
|
||||
}
|
||||
|
||||
static constexpr auto SrcScalarPerVector =
|
||||
reduce_on_sequence(SrcScalarPerVectors{},
|
||||
math::minimize<index_t>{},
|
||||
Number<1>{}); // GetMinSrcScalarPerVector(); SrcScalarPerVectors{}[I0];
|
||||
using SrcCoords = decltype(MakeCoordinates(SrcDescs{}, StaticallyIndexedArray<Index, nSrc>{}));
|
||||
using DstCoords = decltype(MakeCoordinates(DstDescs{}, StaticallyIndexedArray<Index, nDst>{}));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user