Merge commit '9fcc1ee9fd9730efd865f530afde505f2556954d' into develop

This commit is contained in:
assistant-librarian[bot]
2025-08-18 17:12:50 +00:00
parent d436787ed0
commit 68b20e1d4f
113 changed files with 610 additions and 531 deletions

View File

@@ -76,17 +76,17 @@ class TestCkTileMemoryCopy : public ::testing::TestWithParam<std::tuple<int, int
constexpr ck_tile::index_t kBlockSize = 128;
constexpr ck_tile::index_t kBlockPerCu = 1;
auto ms = launch_kernel(ck_tile::stream_config{nullptr, true},
ck_tile::make_kernel<kBlockSize, kBlockPerCu>(
Kernel{},
kGridSize,
kBlockSize,
0,
static_cast<XDataType*>(x_buf.GetDeviceBuffer()),
static_cast<YDataType*>(y_buf.GetDeviceBuffer()),
m,
n,
warp_id));
auto ms = launch_kernel(
ck_tile::stream_config{nullptr, true},
ck_tile::make_kernel<kBlockPerCu>(Kernel{},
kGridSize,
kBlockSize,
0,
static_cast<XDataType*>(x_buf.GetDeviceBuffer()),
static_cast<YDataType*>(y_buf.GetDeviceBuffer()),
m,
n,
warp_id));
auto bytes = 2 * m * n * sizeof(DataType);
std::cout << "elapsed: " << ms << " (ms)" << std::endl;