mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Shuffle fix for gfx950 (#3491)
* solve compiler issue * solve the gfx950 mfma shuffle regression * refactor jenkinsfile to handle arch name better * [CK TILE] set divisor to count of thread along k dimension * fix the compiler error * solve degradation * Finish the multiplies fix * fix the scales * solve compilation error * solve the composes * solve the error of tile sweeper * fix the test and example * fix for gfx950 --------- Co-authored-by: Max Podkorytov <4273004+tenpercent@users.noreply.github.com> Co-authored-by: illsilin_amdeng <Illia.Silin@amd.com> Co-authored-by: Cong Ma <congma13@amd.com>
This commit is contained in:
@@ -65,7 +65,7 @@ inline bool is_gfx12_supported()
|
||||
return get_device_name() == "gfx1200" || get_device_name() == "gfx1201";
|
||||
}
|
||||
|
||||
inline bool is_load_tr_supported()
|
||||
inline bool is_gfx95_supported()
|
||||
{
|
||||
// Check if load transpose is supported.
|
||||
return get_device_name() == "gfx950";
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
#include "device_prop.hpp"
|
||||
#include <stdexcept>
|
||||
|
||||
namespace ck_tile {
|
||||
@@ -98,7 +99,7 @@ auto shuffle_b(const ck_tile::HostTensor<T>& t, const GemmConfig& gemmConfig)
|
||||
else
|
||||
{
|
||||
assert(is_wave32() == false);
|
||||
divisor = gemmConfig.N_Warp_Tile == 32 ? 2 : 4;
|
||||
divisor = get_warp_size() / gemmConfig.N_Warp_Tile;
|
||||
}
|
||||
ck_tile::HostTensor<T> t_view({n_ / gemmConfig.N_Warp_Tile,
|
||||
gemmConfig.N_Warp_Tile,
|
||||
@@ -167,7 +168,7 @@ auto shuffle_b_permuteN(const ck_tile::HostTensor<T>& t, const GemmConfig& gemmC
|
||||
else
|
||||
{
|
||||
assert(is_wave32() == false);
|
||||
divisor = gemmConfig.N_Warp_Tile == 32 ? 2 : 4;
|
||||
divisor = get_warp_size() / gemmConfig.N_Warp_Tile;
|
||||
}
|
||||
ck_tile::HostTensor<T> t_view({n_ / gemmConfig.N_Tile,
|
||||
gemmConfig.N_Warp,
|
||||
|
||||
Reference in New Issue
Block a user