diff --git a/include/cutlass/transform/threadblock/regular_tile_iterator_pitch_linear.h b/include/cutlass/transform/threadblock/regular_tile_iterator_pitch_linear.h index 1485b1533..4a08ba9e3 100644 --- a/include/cutlass/transform/threadblock/regular_tile_iterator_pitch_linear.h +++ b/include/cutlass/transform/threadblock/regular_tile_iterator_pitch_linear.h @@ -204,7 +204,8 @@ public: void store(Fragment const &frag, TensorCoord const & tile_offset) { store_with_pointer_offset( frag, - tile_offset.contiguous() * Shape::kContiguous + tile_offset.strided() * Shape::kStrided * stride_ + tile_offset.contiguous() * Shape::kContiguous / ThreadMap::kElementsPerAccess + + tile_offset.strided() * Shape::kStrided * stride_ ); }