Persistent grouped gemm CompV4 Enablement & Polish (#2605)

* enable the persistent kernel for CompV4

* polish the example and clang format

* fix the non-persistent kernel error

---------

Co-authored-by: ThomasNing <thomasning@amd.com>
This commit is contained in:
Thomas Ning
2025-08-04 23:43:01 -07:00
committed by GitHub
parent 2a78da4708
commit cbfecf8d7a
6 changed files with 148 additions and 289 deletions

View File

@@ -18,12 +18,14 @@ struct BaseGemmPipelineAgBgCrCompV4
static constexpr index_t PrefillStages = 1;
static constexpr index_t GlobalBufferNum = 1;
CK_TILE_HOST static constexpr bool BlockHasHotloop(index_t num_loop)
static constexpr bool UsePersistentKernel = Problem::Traits::UsePersistentKernel;
CK_TILE_HOST_DEVICE static constexpr bool BlockHasHotloop(index_t num_loop)
{
return num_loop > PrefetchStages;
}
CK_TILE_HOST static constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
CK_TILE_HOST_DEVICE static constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
{
if(num_loop % PrefetchStages == 1)
{