mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
Merge commit '9fcc1ee9fd9730efd865f530afde505f2556954d' into develop
This commit is contained in:
@@ -167,17 +167,17 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
}
|
||||
|
||||
// 4. Run the kernel
|
||||
float ave_time = launch_kernel(ck_tile::stream_config{nullptr, true, 0, warmup, repeat},
|
||||
ck_tile::make_kernel<kBlockSize, kBlockPerCu>(
|
||||
Kernel{},
|
||||
kGridSize,
|
||||
kBlockSize,
|
||||
0,
|
||||
input_size,
|
||||
ck_tile::make_tuple(N, 1), // Input Stride
|
||||
ck_tile::make_tuple(N, 1), // Output Stride
|
||||
input_tensors,
|
||||
static_cast<YDataType*>(y_buf.GetDeviceBuffer())));
|
||||
float ave_time = launch_kernel(
|
||||
ck_tile::stream_config{nullptr, true, 0, warmup, repeat},
|
||||
ck_tile::make_kernel<kBlockPerCu>(Kernel{},
|
||||
kGridSize,
|
||||
kBlockSize,
|
||||
0,
|
||||
input_size,
|
||||
ck_tile::make_tuple(N, 1), // Input Stride
|
||||
ck_tile::make_tuple(N, 1), // Output Stride
|
||||
input_tensors,
|
||||
static_cast<YDataType*>(y_buf.GetDeviceBuffer())));
|
||||
|
||||
std::cout << "Average time: " << ave_time << " ms" << std::endl;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user