diff --git a/include/ck_tile/core/tensor/tile_window.hpp b/include/ck_tile/core/tensor/tile_window.hpp index e1096556f4..2c9b227124 100644 --- a/include/ck_tile/core/tensor/tile_window.hpp +++ b/include/ck_tile/core/tensor/tile_window.hpp @@ -649,12 +649,19 @@ struct tile_window_with_static_distribution return 0; }(); - this->get_bottom_tensor_view().template async_get_vectorized_elements( - smem, - bottom_tensor_thread_coord.get_offset() + offset, - dram_ys_offset - imm_valid, - number{}, - bool_constant{}); + if constexpr(!static_move_ys) + this->get_bottom_tensor_view().template async_get_vectorized_elements( + smem, + bottom_tensor_thread_coord, + offset + dram_ys_offset, + bool_constant{}); + else + this->get_bottom_tensor_view().template async_get_vectorized_elements( + smem, + bottom_tensor_thread_coord.get_offset() + offset, + dram_ys_offset - imm_valid, + number{}, + bool_constant{}); // Move thread coordinate if not last access if constexpr(iCoordAccess != (NumAccessPerCoord - 1))