Support Wave32 in CK_TILE - Part 1 (#2594)

* Support wave32/wave64 in CK_TILE - Part 1

* remove blocksize in kernel launch

* fix build error

* fix clang format

* fix clang format 2

* fix clang format 3

* fix fmha build error

* fix fmha build 2

* fix fmha build 3

* fix build error 4

* address review comment

* update change log

* replace KernelBlockSize with kBlockSize

* fix CI fail

* fix clang format

* address review comment and rebase code.

* fix universal test fail

---------

Co-authored-by: Lin, Qun <Quentin.Lin+amdeng@amd.com>
Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
This commit is contained in:
linqunAMD
2025-08-19 01:08:31 +08:00
committed by GitHub
parent 26d3300930
commit 9fcc1ee9fd
113 changed files with 610 additions and 531 deletions

View File

@@ -82,18 +82,18 @@ class TestCkTileReduce : public ::testing::Test
throw std::runtime_error("Wrong! Arguments not supported!\n");
}
ck_tile::launch_kernel(ck_tile::stream_config{nullptr, false, 0},
ck_tile::make_kernel<kBlockSize, kBlockPerCu>(
Kernel{},
kGridSize,
kBlockSize,
0,
static_cast<XDataType*>(d_x_mem.GetDeviceBuffer()),
static_cast<YDataType*>(d_y_mem.GetDeviceBuffer()),
input_shape_tuple,
input_strides_tuple,
kept_dims,
reduce_dims));
ck_tile::launch_kernel(
ck_tile::stream_config{nullptr, false, 0},
ck_tile::make_kernel<kBlockPerCu>(Kernel{},
kGridSize,
kBlockSize,
0,
static_cast<XDataType*>(d_x_mem.GetDeviceBuffer()),
static_cast<YDataType*>(d_y_mem.GetDeviceBuffer()),
input_shape_tuple,
input_strides_tuple,
kept_dims,
reduce_dims));
// Get results back
d_y_mem.FromDevice(h_y.data());