mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 04:49:54 +00:00
Support Wave32 in CK_TILE - Part 1 (#2594)
* Support wave32/wave64 in CK_TILE - Part 1
* remove blocksize in kernel launch
* fix build error
* fix clang format
* fix clang format 2
* fix clang format 3
* fix fmha build error
* fix fmha build 2
* fix fmha build 3
* fix build error 4
* address review comment
* update change log
* replace KernelBlockSize with kBlockSize
* fix CI fail
* fix clang format
* address review comment and rebase code.
* fix universal test fail
---------
Co-authored-by: Lin, Qun <Quentin.Lin+amdeng@amd.com>
Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
[ROCm/composable_kernel commit: 9fcc1ee9fd]
This commit is contained in:
@@ -118,19 +118,17 @@ class TestCkTileElementwise : public ::testing::Test
|
||||
"The kernel configuration is not supported for the given input size.");
|
||||
}
|
||||
|
||||
ck_tile::launch_kernel(
|
||||
s,
|
||||
ck_tile::make_kernel<TestElementWiseShape::kBlockSize, // MaxThreadPerBlock
|
||||
kBlockPerCu> // MinBlockPerCu
|
||||
(ew_kernel,
|
||||
grid,
|
||||
block,
|
||||
0, // actual shared memory
|
||||
lens,
|
||||
strides, // input strides
|
||||
strides, // output strides
|
||||
d_x_ptrs_tuple,
|
||||
p_y_device));
|
||||
ck_tile::launch_kernel(s,
|
||||
ck_tile::make_kernel<kBlockPerCu> // MinBlockPerCu
|
||||
(ew_kernel,
|
||||
grid,
|
||||
block,
|
||||
0, // actual shared memory
|
||||
lens,
|
||||
strides, // input strides
|
||||
strides, // output strides
|
||||
d_x_ptrs_tuple,
|
||||
p_y_device));
|
||||
|
||||
d_y_mem.FromDevice(h_y.data());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user