From 0fdbf8a91d10df00a0635c3f2bd3959de0a126de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Kocot?= <38502616+bartekxk@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:28:21 +0000 Subject: [PATCH] [rocm-libraries] ROCm/rocm-libraries#8272 (commit 1c66ecb) [CK] Padding on K for global load for grouped conv bwd data (#8272) ## Motivation Fix incorrect results caused by lack of padding during global load in grouped convolution backward data kernel. It is needed since there is no OOB check for global load. ## Technical Details Add padding needed for global load which not use OOB check. ## Test Plan test_grouped_convnd_bwd_data* ## Test Result Passed locally ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --- .../transform_conv_bwd_data_to_gemm_v1.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp index 52d5bebcdf..50a13e297f 100644 --- a/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp +++ b/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp @@ -42,6 +42,7 @@ struct TransformConvBwdDataToGemm_v1 * in convolution backward data operations. */ static constexpr bool CustomTensorTransformBwdData = true; + static constexpr bool UsePaddingOnKDim = std::is_same_v; template using NumberType = @@ -694,7 +695,7 @@ struct TransformConvBwdDataToGemm_v1 ck::tensor_operation::device::PadTensorDescriptor( out_gemmak0_gemmmraw_gemmak1_grid_desc, make_tuple(AK0 * batch_k_, GemmMPerBlock, AK1), - Sequence{}); + Sequence{}); return out_gemmak0_gemmm_gemmak1_grid_desc; } @@ -993,7 +994,7 @@ struct TransformConvBwdDataToGemm_v1 ck::tensor_operation::device::PadTensorDescriptor( wei_gemmbk0_gemmnraw_gemmbk1_grid_desc, make_tuple(BK0 * batch_k_, GemmNPerBlock, BK1), - Sequence{}); + Sequence{}); return wei_gemmbk0_gemmn_gemmbk1_grid_desc; } @@ -1573,7 +1574,7 @@ struct TransformConvBwdDataToGemm_v1 return ck::tensor_operation::device::PadTensorDescriptor( out_gemmk0_gemmm_gemmk1_grid_desc, make_tuple(Number{}, Number{}, Number{}), - Sequence{}); + Sequence{}); } else { @@ -1645,7 +1646,7 @@ struct TransformConvBwdDataToGemm_v1 return ck::tensor_operation::device::PadTensorDescriptor( out_gemmk0_gemmm_gemmk1_grid_desc, make_tuple(Number{}, Number{}, Number{}), - Sequence{}); + Sequence{}); } } @@ -1671,7 +1672,7 @@ struct TransformConvBwdDataToGemm_v1 return ck::tensor_operation::device::PadTensorDescriptor( wei_gemmk0_gemmn_gemmk1_grid_desc, make_tuple(Number{}, Number{}, Number{}), - Sequence{}); + Sequence{}); } else { @@ -1724,7 +1725,7 @@ struct TransformConvBwdDataToGemm_v1 return ck::tensor_operation::device::PadTensorDescriptor( wei_gemmk0_gemmn_gemmk1_grid_desc, make_tuple(Number{}, Number{}, Number{}), - Sequence{}); + Sequence{}); } }