diff --git a/include/ck_tile/core/numeric/integral_constant.hpp b/include/ck_tile/core/numeric/integral_constant.hpp index 879f9565b8..ea7a67abcc 100644 --- a/include/ck_tile/core/numeric/integral_constant.hpp +++ b/include/ck_tile/core/numeric/integral_constant.hpp @@ -33,7 +33,7 @@ template using number = constant; template -using long_number = integral_constant; +using long_number = constant; template using bool_constant = constant; diff --git a/include/ck_tile/core/tensor/shuffle_tile.hpp b/include/ck_tile/core/tensor/shuffle_tile.hpp index a756679bd9..502b7560a4 100644 --- a/include/ck_tile/core/tensor/shuffle_tile.hpp +++ b/include/ck_tile/core/tensor/shuffle_tile.hpp @@ -10,6 +10,7 @@ #include "ck_tile/core/algorithm/coordinate_transform.hpp" #include "ck_tile/core/algorithm/space_filling_curve.hpp" #include "ck_tile/core/container/container_helper.hpp" +#include "ck_tile/core/container/thread_buffer.hpp" #include "ck_tile/core/container/statically_indexed_array.hpp" #include "ck_tile/core/numeric/math.hpp" #include "ck_tile/core/utility/type_traits.hpp" @@ -98,8 +99,8 @@ CK_TILE_DEVICE void shuffle_tile_impl_in_thread(OutTensor& out_tensor, const InT static_assert(num_access > 0, "wrong! num_access should be larger than 0"); // in/out vectors to be transposed - statically_indexed_array in_vectors; - statically_indexed_array out_vectors; + thread_buffer in_vectors; + thread_buffer out_vectors; // loop over SFC and do transpose static_for<0, num_access, 1>{}([&](auto iAccess) { diff --git a/include/ck_tile/core/utility/transpose_vectors.hpp b/include/ck_tile/core/utility/transpose_vectors.hpp index acd5dd7b1d..7eb7af1073 100644 --- a/include/ck_tile/core/utility/transpose_vectors.hpp +++ b/include/ck_tile/core/utility/transpose_vectors.hpp @@ -5,6 +5,7 @@ #include "ck_tile/core/config.hpp" #include "ck_tile/core/container/array.hpp" +#include "ck_tile/core/container/thread_buffer.hpp" #include "ck_tile/core/utility/bit_cast.hpp" #include "ck_tile/core/utility/functional.hpp" @@ -25,7 +26,7 @@ struct transpose_vectors using VX = array; using VY = array; - CK_TILE_DEVICE void operator()(const array& vx_tuple, array& vy_tuple) + CK_TILE_DEVICE void operator()(const thread_buffer& vx_tuple, thread_buffer& vy_tuple) { constexpr auto I1 = number<1>{}; constexpr auto I2 = number<2>{};