diff --git a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v4.hpp b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v4.hpp index f22d2f599a..d448cdbb93 100644 --- a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v4.hpp +++ b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_comp_v4.hpp @@ -23,7 +23,8 @@ struct BaseGemmPipelineAgBgCrCompV4 CK_TILE_HOST_DEVICE static constexpr bool BlockHasHotloop(index_t num_loop) { - return num_loop > PrefetchStages; + constexpr index_t HotLoopGlobalReads = 2; + return num_loop >= (HotLoopGlobalReads + PrefetchStages); } CK_TILE_HOST_DEVICE static constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)