mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-03-23 08:37:39 +00:00
optimize gemm2 atomic_add pattern
This commit is contained in:
@@ -693,13 +693,14 @@ struct MoeFlatmmKernel
|
||||
"Currently, the CShuffle EpiloguePipeline only supports the Row Major "
|
||||
"Output layout");
|
||||
|
||||
using TileEncodingPattern =
|
||||
TileDistributionEncodingPattern2D<kBlockSize,
|
||||
MPerIterationShuffle,
|
||||
NPerIterationShuffle,
|
||||
EpiloguePipeline::GetVectorSizeC(),
|
||||
tile_distribution_pattern::thread_raked,
|
||||
EpiProblem::kNumWaveGroups>;
|
||||
using TileEncodingPattern = TileDistributionEncodingPattern2D<
|
||||
kBlockSize,
|
||||
MPerIterationShuffle,
|
||||
NPerIterationShuffle,
|
||||
kind == MoeFlatmmKind::kFFN_gemm2 ? 2 : EpiloguePipeline::GetVectorSizeC(),
|
||||
tile_distribution_pattern::thread_raked,
|
||||
EpiProblem::kNumWaveGroups>;
|
||||
|
||||
constexpr auto dram_tile_distribution =
|
||||
TileEncodingPattern::Make2DStaticTileDistribution();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user