From 4aeb73c616629f18e2fa98245eecd7f3acc1feba Mon Sep 17 00:00:00 2001 From: Kevin Abraham Date: Thu, 30 Oct 2025 21:16:02 +0000 Subject: [PATCH] fixed synchronization issue in block gemm pipeline v1 that caused b_scale to fail --- .../gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp index f25648efa6..004a867898 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp @@ -289,6 +289,10 @@ struct BlockwiseGemmWmmaops_pipeline_v1((i + 2) % num_loop_per_scale == 0); + if constexpr(ck::is_same::value == false) + { + block_sync_lds(); + } a_blockwise_copy.RunWrite(a_block_desc, a_block_buf); b_blockwise_copy.RunWrite(b_block_desc, b_block_buf); @@ -631,6 +635,10 @@ struct BlockwiseGemmWmmaops_pipeline_v1((i + 2) % num_loop_per_scale == 0); + if constexpr(ck::is_same::value == false) + { + block_sync_lds(); + } a_blockwise_copy.RunWrite(a_block_desc, a_block_buf); b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);