Merge commit 'b0ee317d83b77741022997265d4125697e7f7804' into develop

This commit is contained in:
assistant-librarian[bot]
2025-09-12 20:11:58 +00:00
parent facbc883fa
commit 302aa809ea
65 changed files with 2301 additions and 232 deletions

View File

@@ -45,15 +45,15 @@ auto shuffle_b(const ck_tile::HostTensor<T>& t)
if(ck_tile::is_gfx12_supported())
{
// TODO: Please modify it once kABK0PerLane is changed in WmmaTraitsBase<gfx12>
constexpr int divisor = 2;
constexpr int kABK0PerLane = 2;
constexpr int kABK1PerLane = 8;
constexpr int kABK0PerLane = FlatmmConfig::K_Warp_Tile / divisor / kABK1PerLane;
ck_tile::HostTensor<T> t_view({n_ / FlatmmConfig::N_Warp_Tile,
FlatmmConfig::N_Warp_Tile,
k_ / FlatmmConfig::K_Warp_Tile,
divisor,
kABK0PerLane,
FlatmmConfig::K_Warp_Tile / divisor / kABK0PerLane});
divisor,
kABK1PerLane});
std::copy(t.begin(), t.end(), t_view.begin());
return ck_tile::reference_permute(t_view, {0, 2, 4, 1, 3, 5});
}