mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 03:19:48 +00:00
Tune & add conflict-free LDS gemm kernels (#159)
* retune & add conflict-free bf16/fp16 c-shuffle gemm instances
amend wrong K1 value in some fp16/bf16 kernel instances
* make gemm cshuffle's timing behavior consistent with all other functions
* clang-format
* retune & add conflict-free fp32 c-shuffle gemm instances
* retune & add conflict-free int8 c-shuffle gemm instances
* update the underlying gridwise gemm of all c-shuffle gemm kernels
* typo
[ROCm/composable_kernel commit: 7db48f9008]
This commit is contained in:
@@ -171,22 +171,7 @@ int main(int argc, char* argv[])
|
||||
"not support this GEMM problem");
|
||||
}
|
||||
|
||||
// warm up
|
||||
invoker.Run(argument);
|
||||
|
||||
// timing
|
||||
KernelTimer timer;
|
||||
|
||||
timer.Start();
|
||||
|
||||
for(int i = 0; i < nrepeat; ++i)
|
||||
{
|
||||
invoker.Run(argument);
|
||||
}
|
||||
|
||||
timer.End();
|
||||
|
||||
float ave_time = timer.GetElapsedTime() / nrepeat;
|
||||
float ave_time = invoker.Run(argument, nrepeat);
|
||||
|
||||
std::size_t flop = std::size_t(2) * M * N * K;
|
||||
std::size_t num_btype =
|
||||
|
||||
Reference in New Issue
Block a user