mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Shuffle fix for gfx950 (#3491)
* solve compiler issue * solve the gfx950 mfma shuffle regression * refactor jenkinsfile to handle arch name better * [CK TILE] set divisor to count of thread along k dimension * fix the compiler error * solve degradation * Finish the multiplies fix * fix the scales * solve compilation error * solve the composes * solve the error of tile sweeper * fix the test and example * fix for gfx950 --------- Co-authored-by: Max Podkorytov <4273004+tenpercent@users.noreply.github.com> Co-authored-by: illsilin_amdeng <Illia.Silin@amd.com> Co-authored-by: Cong Ma <congma13@amd.com>
This commit is contained in:
@@ -1351,8 +1351,8 @@ fwd_result fmha_fwd_run(mode_enum mode,
|
||||
|
||||
auto oacc_element_func = [&]() {
|
||||
if constexpr(std::is_same_v<ODataType, ck_tile::fp8_t> && supports_qscale)
|
||||
return ck_tile::composes(ck_tile::saturates<ck_tile::fp8_t>{},
|
||||
ck_tile::scales{scale_o_host});
|
||||
return ck_tile::make_composes(ck_tile::saturates<ck_tile::fp8_t>{},
|
||||
ck_tile::scales{scale_o_host});
|
||||
else if constexpr(supports_qscale)
|
||||
return ck_tile::scales{scale_o_host};
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user