From abc0a0b77f99598c8ebee290b3fdbcd3ec0f12c3 Mon Sep 17 00:00:00 2001 From: kabrahamAMD Date: Fri, 31 Oct 2025 15:19:01 +0100 Subject: [PATCH] Kabraham/fix block gemm v1 b scale (#3129) * fixed synchronization issue in block gemm pipeline v1 that caused b_scale to fail * run clang-format --------- Co-authored-by: Kevin Abraham [ROCm/composable_kernel commit: a7c52e8afa710291c6d3e4326a9059ac84bed50d] --- .../gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp index 87ccc7c5e0..5d7c570428 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_v1.hpp @@ -300,6 +300,10 @@ struct BlockwiseGemmWmmaops_pipeline_v1((i + 2) % num_loop_per_scale == 0); + if constexpr(ck::is_same::value == false) + { + block_sync_lds(); + } a_blockwise_copy.RunWrite(a_block_desc, a_block_buf); b_blockwise_copy.RunWrite(b_block_desc, b_block_buf); @@ -699,6 +703,10 @@ struct BlockwiseGemmWmmaops_pipeline_v1((i + 2) % num_loop_per_scale == 0); + if constexpr(ck::is_same::value == false) + { + block_sync_lds(); + } a_blockwise_copy.RunWrite(a_block_desc, a_block_buf); b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);