mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Shuffle fix for gfx950 (#3491)
* solve compiler issue
* solve the gfx950 mfma shuffle regression
* refactor jenkinsfile to handle arch name better
* [CK TILE] set divisor to count of thread along k dimension
* fix the compiler error
* solve degradation
* Finish the multiplies fix
* fix the scales
* solve compilation error
* solve the composes
* solve the error of tile sweeper
* fix the test and example
* fix for gfx950
---------
Co-authored-by: Max Podkorytov <4273004+tenpercent@users.noreply.github.com>
Co-authored-by: illsilin_amdeng <Illia.Silin@amd.com>
Co-authored-by: Cong Ma <congma13@amd.com>
[ROCm/composable_kernel commit: 00c46785a8]
This commit is contained in:
@@ -42,7 +42,8 @@ struct AtomicKernelShape
|
||||
static constexpr index_t Repeat_M = Block_M * RepeatInWarp_M / (WarpPerBlock_M * Warp_M);
|
||||
static constexpr index_t Repeat_N = Block_N * RepeatInWarp_N / (WarpPerBlock_N * Warp_N);
|
||||
|
||||
static constexpr index_t WaveNum = reduce_on_sequence(BlockWaves{}, multiplies{}, number<1>{});
|
||||
static constexpr index_t WaveNum =
|
||||
reduce_on_sequence(BlockWaves{}, multiplies<>{}, number<1>{});
|
||||
|
||||
static constexpr index_t BlockSize = get_warp_size() * WaveNum;
|
||||
};
|
||||
|
||||
@@ -42,7 +42,8 @@ struct TileCopyShape
|
||||
static constexpr index_t Repeat_M = Block_M / (WarpPerBlock_M * Warp_M);
|
||||
static constexpr index_t Repeat_N = Block_N / (WarpPerBlock_N * Warp_N);
|
||||
|
||||
static constexpr index_t WaveNum = reduce_on_sequence(BlockWaves{}, multiplies{}, number<1>{});
|
||||
static constexpr index_t WaveNum =
|
||||
reduce_on_sequence(BlockWaves{}, multiplies<>{}, number<1>{});
|
||||
|
||||
static constexpr index_t BlockSize = get_warp_size() * WaveNum;
|
||||
static constexpr index_t WaveGroupSize = WaveNum / WaveGroups;
|
||||
|
||||
Reference in New Issue
Block a user