diff --git a/include/ck_tile/ops/transpose/block_transpose.hpp b/include/ck_tile/ops/transpose/block_transpose.hpp index 5b8d531e01..5d91e2bdee 100644 --- a/include/ck_tile/ops/transpose/block_transpose.hpp +++ b/include/ck_tile/ops/transpose/block_transpose.hpp @@ -8,6 +8,9 @@ namespace ck_tile { +template +struct Debug; + template struct TransposeTraits { @@ -118,7 +121,7 @@ struct BlockTranspose auto input_tile_window = make_tile_window(input_window, Policy::template MakeInputDistribution()); auto output_tile_window = - make_tile_window(out_window, Policy::template MakeOutputDistribution()); + make_tile_window(out_window, Policy::template MakeLdsLoadTileDistribution()); DataType* p_lds_ptr = static_cast(p_smem); constexpr auto in_lds_block_desc = Policy::template MakeLdsStoreBlockDescriptor(); @@ -145,13 +148,15 @@ struct BlockTranspose block_sync_lds(); auto y = load_tile(load_from_lds_window); + + //Debug> cccc; // auto load_from_lds_window = // make_tile_window(output_lds_block, // make_tuple(number{}, // number{}), {0, 0}, Policy::template // MakeLdsLoadTileDistribution()); - // auto y = load_tile_transpose(load_from_lds_window); + //auto y = load_tile_transpose(load_from_lds_window); store_tile(output_tile_window, y); } }; diff --git a/include/ck_tile/ops/transpose/transpose_policy.hpp b/include/ck_tile/ops/transpose/transpose_policy.hpp index 33f481be30..4fdf1acb80 100644 --- a/include/ck_tile/ops/transpose/transpose_policy.hpp +++ b/include/ck_tile/ops/transpose/transpose_policy.hpp @@ -157,6 +157,7 @@ struct TransposePolicy kSecond / QuartTransposeTraits::ksecondDim; using xdllevel_dstr_encoding = typename QuartTransposeTraits< typename Problem::DataType>::template TileDistribution; + constexpr index_t kLeadIterPerWarp = Problem::kLeadXdlNumPerWarp; constexpr index_t kSecondIterPerWarp = Problem::kSecondXdlNumPerWarp; constexpr index_t kLeadNumWarps = Problem::kLeadNumWarps; @@ -165,12 +166,14 @@ struct TransposePolicy tile_distribution_encoding, tuple, sequence>, - tuple, sequence<2>>, - tuple, sequence<1>>, + tuple>, + tuple>, sequence<1, 2>, sequence<0, 0>>{}; - return detail::make_embed_tile_distribution_encoding(block_outer_dst_encoding, + constexpr auto blk_distr_encode = detail::make_embed_tile_distribution_encoding(block_outer_dst_encoding, xdllevel_dstr_encoding{}); + constexpr auto block_dstr = make_static_tile_distribution(blk_distr_encode); + return block_dstr; } };