[CK_TILE] Tile loop persistent gemm kernel (#2191)

* Implement tile loop persistent gemm kernel

* Enable timing

* Add tests for persistent gemm

* Fix formatting

* Fix gemm_basic

* Rename True/False to Persistent/NonPersistent

* Use only one set of layouts for persistent tests

* Fix gemm example persistent template parameter

* Fix formatting
This commit is contained in:
Sami Remes
2025-06-04 11:46:28 +03:00
committed by GitHub
parent 52b4860a30
commit ffb52783d0
10 changed files with 232 additions and 18 deletions

View File

@@ -213,7 +213,8 @@ auto create_args(int argc, char* argv[])
.insert("repeat", "100", "number of iterations to benchmark the kernel")
.insert("timer", "gpu", "gpu:gpu timer, cpu:cpu timer")
.insert("split_k", "1", "splitK value")
.insert("init", "0", "0:random, 1:linear, 2:constant(1)");
.insert("init", "0", "0:random, 1:linear, 2:constant(1)")
.insert("persistent", "0", "0:non-persistent, 1:persistent");
bool result = arg_parser.parse(argc, argv);
return std::make_tuple(result, arg_parser);
@@ -226,5 +227,6 @@ template <typename ADataType,
typename CDataType,
typename ALayout,
typename BLayout,
typename CLayout>
typename CLayout,
bool Persistent = false>
float gemm_calc(const ck_tile::GemmHostArgs& args, const ck_tile::stream_config& s);