mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 19:40:04 +00:00
[CK_TILE][REGRESSION] Correct blockSize in Generic2dBlockShape (c254f… (#2837)
* [CK_TILE][REGRESSION] Correct blockSize in Generic2dBlockShape (5b17f135b7) WarpPerBlock_M * WarpPerBlock_N are not equal with ThreadPerBlock_M * ThreadPerBlock_N /warpSize. we should calculate BlockSize from WarpPerBlock_M * WarpPerBlock_N To compatible with wave32, function GetBlockSize is added to calculate correct size in host side. * fix blocksize for all kernel related with generic2dblockshap * remove constexpr for blocks [ROCm/composable_kernel commit:b7a806f244]
This commit is contained in:
@@ -82,7 +82,11 @@ struct Smoothquant
|
||||
return dim3(integer_divide_ceil(hargs.m, Block_M));
|
||||
}
|
||||
|
||||
CK_TILE_HOST static constexpr auto BlockSize() { return Problem::BlockShape::BlockSize; }
|
||||
CK_TILE_HOST static constexpr auto BlockSize()
|
||||
{
|
||||
return is_wave32() ? Problem::BlockShape::template GetBlockSize<true>()
|
||||
: Problem::BlockShape::template GetBlockSize<false>();
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
template <typename T> struct t2s;
|
||||
|
||||
Reference in New Issue
Block a user