mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 19:28:33 +00:00
miss output tile distribution mapping
This commit is contained in:
@@ -294,7 +294,7 @@ endif()
|
||||
|
||||
option(USE_BITINT_EXTENSION_INT4 "Whether to enable clang's BitInt extension to provide int4 data type." OFF)
|
||||
option(USE_OPT_GFX11 "Whether to enable LDS cumode and Wavefront32 mode for GFX11 silicons." OFF)
|
||||
|
||||
option(SAVE_TEMPS "Whether save intermeidate result." OFF)
|
||||
if(USE_BITINT_EXTENSION_INT4)
|
||||
add_compile_definitions(CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4)
|
||||
add_compile_options(-Wno-bit-int-extension)
|
||||
@@ -307,6 +307,11 @@ if(USE_OPT_GFX11)
|
||||
message("CK compiled with USE_OPT_GFX11 set to ${USE_OPT_GFX11}")
|
||||
endif()
|
||||
|
||||
if(SAVE_TEMPS)
|
||||
add_compile_options(-save-temps=obj -Wno-gnu-line-marker)
|
||||
endif()
|
||||
|
||||
|
||||
## Threads
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
@@ -120,8 +120,8 @@ struct BlockTranspose
|
||||
{
|
||||
auto input_tile_window =
|
||||
make_tile_window(input_window, Policy::template MakeInputDistribution<Problem>());
|
||||
//auto output_tile_window =
|
||||
// make_tile_window(output_window, Policy::template MakeLdsLoadTileDistribution<Problem>());
|
||||
auto output_tile_window =
|
||||
make_tile_window(output_window, Policy::template MakeOutputDistribution<Problem>());
|
||||
|
||||
DataType* p_lds_ptr = static_cast<DataType*>(p_smem);
|
||||
constexpr auto in_lds_block_desc = Policy::template MakeLdsStoreBlockDescriptor<Problem>();
|
||||
@@ -147,19 +147,11 @@ struct BlockTranspose
|
||||
store_tile(copy_to_lds_window, x);
|
||||
block_sync_lds();
|
||||
|
||||
//auto y = load_tile(load_from_lds_window);
|
||||
|
||||
//Debug<remove_cvref_t<decltype(y)>> cccc;
|
||||
// auto load_from_lds_window =
|
||||
// make_tile_window(output_lds_block,
|
||||
// make_tuple(number<kSecondSizePerBlock>{},
|
||||
// number<kLeadSizePerBlock>{}), {0, 0}, Policy::template
|
||||
// MakeLdsLoadTileDistribution<Problem>());
|
||||
|
||||
auto y = load_tile_transpose(load_from_lds_window);
|
||||
|
||||
//Debug<remove_cvref_t<decltype(y)>> cccc;
|
||||
store_tile(output_window, y);
|
||||
// auto out_tensor = make_static_distributed_tensor<DataType>(Policy::template MakeOutputDistribution<Problem>());
|
||||
|
||||
store_tile(output_tile_window, out_tensor);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -79,17 +79,18 @@ struct TransposePolicy
|
||||
template <typename Problem>
|
||||
CK_TILE_HOST_DEVICE static constexpr auto MakeOutputDistribution()
|
||||
{
|
||||
constexpr index_t BlockSize = Problem::kBlockSize;
|
||||
constexpr index_t LeadDimPerBlock = Problem::kSecondSizePerBlock;
|
||||
constexpr index_t SecondDimPerBlock = Problem::kLeadSizePerBlock;
|
||||
constexpr index_t VecLoadSize = 16 / sizeof(typename Problem::DataType);
|
||||
|
||||
using TileEncodingPattern = TileDistributionEncodingPattern2D<BlockSize,
|
||||
LeadDimPerBlock,
|
||||
SecondDimPerBlock,
|
||||
VecLoadSize,
|
||||
TileAccessPattern>;
|
||||
return TileEncodingPattern::Make2DStaticTileDistribution();
|
||||
//constexpr index_t BlockSize = Problem::kBlockSize;
|
||||
//constexpr index_t LeadDimPerBlock = Problem::kSecondSizePerBlock;
|
||||
//constexpr index_t SecondDimPerBlock = Problem::kLeadSizePerBlock;
|
||||
constexpr index_t VecLoadSize = 8 / sizeof(typename Problem::DataType);
|
||||
//TODO, fix the tile distribution
|
||||
return make_static_tile_distribution(
|
||||
tile_distribution_encoding<sequence<>,
|
||||
tuple<sequence<16>, sequence<4, VecLoadSize>>,
|
||||
tuple<sequence<2, 1>>,
|
||||
tuple<sequence<0, 0>>,
|
||||
sequence<2>,
|
||||
sequence<1>>{});
|
||||
}
|
||||
|
||||
template <typename Problem>
|
||||
|
||||
Reference in New Issue
Block a user