Fix transform and instances for grouped conv bwd data (#848)

* Fix transform and instances for grouped conv bwd data

* Add instances for small K and small C

* Remove workaround after fix

* Fix interface tests

[ROCm/composable_kernel commit: 595d23be14]
This commit is contained in:
Bartłomiej Kocot
2023-08-22 18:25:41 +02:00
committed by GitHub
parent 6f9eeb3190
commit 80659b5bc1
8 changed files with 203 additions and 160 deletions

View File

@@ -129,6 +129,9 @@ struct ThreadwiseTensorSliceTransfer_v3r1
constexpr auto src_access_lengths = SliceLengths{} / src_scalar_per_access;
static_assert(SliceLengths::At(SrcVectorDim) % SrcScalarPerVector == 0,
"SliceLengths[SrcVectorDim] must be divisible by SrcScalarPerVector");
constexpr auto src_dim_access_order = SrcDimAccessOrder{};
constexpr auto ordered_src_access_lengths =