Fix transform and instances for grouped conv bwd data (#848)

* Fix transform and instances for grouped conv bwd data

* Add instances for small K and small C

* Remove workaround after fix

* Fix interface tests
This commit is contained in:
Bartłomiej Kocot
2023-08-22 18:25:41 +02:00
committed by GitHub
parent eac50708d9
commit 595d23be14
8 changed files with 203 additions and 160 deletions

View File

@@ -129,6 +129,9 @@ struct ThreadwiseTensorSliceTransfer_v3r1
constexpr auto src_access_lengths = SliceLengths{} / src_scalar_per_access;
static_assert(SliceLengths::At(SrcVectorDim) % SrcScalarPerVector == 0,
"SliceLengths[SrcVectorDim] must be divisible by SrcScalarPerVector");
constexpr auto src_dim_access_order = SrcDimAccessOrder{};
constexpr auto ordered_src_access_lengths =