[rocm-libraries] ROCm/rocm-libraries#8272 (commit 1c66ecb)

[CK] Padding on K for global load for grouped conv bwd data
 (#8272)

## Motivation

Fix incorrect results caused by lack of padding during global load in
grouped convolution backward data kernel. It is needed since there is no
OOB check for global load.

## Technical Details

Add padding needed for global load which not use OOB check.

## Test Plan

test_grouped_convnd_bwd_data*

## Test Result

Passed locally

## Submission Checklist

- [x] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
This commit is contained in:
Bartłomiej Kocot
2026-06-11 15:28:21 +00:00
committed by assistant-librarian[bot]
parent f0545b5c15
commit 0fdbf8a91d

View File

@@ -42,6 +42,7 @@ struct TransformConvBwdDataToGemm_v1
* in convolution backward data operations.
*/
static constexpr bool CustomTensorTransformBwdData = true;
static constexpr bool UsePaddingOnKDim = std::is_same_v<IndexType, long_index_t>;
template <index_t N>
using NumberType =
@@ -694,7 +695,7 @@ struct TransformConvBwdDataToGemm_v1
ck::tensor_operation::device::PadTensorDescriptor(
out_gemmak0_gemmmraw_gemmak1_grid_desc,
make_tuple(AK0 * batch_k_, GemmMPerBlock, AK1),
Sequence<false, DoPadGemmM, false>{});
Sequence<UsePaddingOnKDim, DoPadGemmM, UsePaddingOnKDim>{});
return out_gemmak0_gemmm_gemmak1_grid_desc;
}
@@ -993,7 +994,7 @@ struct TransformConvBwdDataToGemm_v1
ck::tensor_operation::device::PadTensorDescriptor(
wei_gemmbk0_gemmnraw_gemmbk1_grid_desc,
make_tuple(BK0 * batch_k_, GemmNPerBlock, BK1),
Sequence<false, DoPadGemmN, false>{});
Sequence<UsePaddingOnKDim, DoPadGemmN, UsePaddingOnKDim>{});
return wei_gemmbk0_gemmn_gemmbk1_grid_desc;
}
@@ -1573,7 +1574,7 @@ struct TransformConvBwdDataToGemm_v1
return ck::tensor_operation::device::PadTensorDescriptor(
out_gemmk0_gemmm_gemmk1_grid_desc,
make_tuple(Number<GemmKPerBlock / AK1>{}, Number<GemmMPerBlock>{}, Number<AK1>{}),
Sequence<true, DoPadGemmM, false>{});
Sequence<true, DoPadGemmM, UsePaddingOnKDim>{});
}
else
{
@@ -1645,7 +1646,7 @@ struct TransformConvBwdDataToGemm_v1
return ck::tensor_operation::device::PadTensorDescriptor(
out_gemmk0_gemmm_gemmk1_grid_desc,
make_tuple(Number<GemmKPerBlock / AK1>{}, Number<GemmMPerBlock>{}, Number<AK1>{}),
Sequence<true, DoPadGemmM, false>{});
Sequence<true, DoPadGemmM, UsePaddingOnKDim>{});
}
}
@@ -1671,7 +1672,7 @@ struct TransformConvBwdDataToGemm_v1
return ck::tensor_operation::device::PadTensorDescriptor(
wei_gemmk0_gemmn_gemmk1_grid_desc,
make_tuple(Number<GemmKPerBlock / BK1>{}, Number<GemmNPerBlock>{}, Number<BK1>{}),
Sequence<true, DoPadGemmN, false>{});
Sequence<true, DoPadGemmN, UsePaddingOnKDim>{});
}
else
{
@@ -1724,7 +1725,7 @@ struct TransformConvBwdDataToGemm_v1
return ck::tensor_operation::device::PadTensorDescriptor(
wei_gemmk0_gemmn_gemmk1_grid_desc,
make_tuple(Number<GemmKPerBlock / BK1>{}, Number<GemmNPerBlock>{}, Number<BK1>{}),
Sequence<true, DoPadGemmN, false>{});
Sequence<true, DoPadGemmN, UsePaddingOnKDim>{});
}
}