miss output tile distribution mapping

This commit is contained in:
joye
2025-04-24 21:55:03 -05:00
parent 6beb585dad
commit e2f3c95d24
3 changed files with 23 additions and 25 deletions

View File

@@ -294,7 +294,7 @@ endif()
option(USE_BITINT_EXTENSION_INT4 "Whether to enable clang's BitInt extension to provide int4 data type." OFF)
option(USE_OPT_GFX11 "Whether to enable LDS cumode and Wavefront32 mode for GFX11 silicons." OFF)
option(SAVE_TEMPS "Whether save intermeidate result." OFF)
if(USE_BITINT_EXTENSION_INT4)
add_compile_definitions(CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4)
add_compile_options(-Wno-bit-int-extension)
@@ -307,6 +307,11 @@ if(USE_OPT_GFX11)
message("CK compiled with USE_OPT_GFX11 set to ${USE_OPT_GFX11}")
endif()
if(SAVE_TEMPS)
add_compile_options(-save-temps=obj -Wno-gnu-line-marker)
endif()
## Threads
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)

View File

@@ -120,8 +120,8 @@ struct BlockTranspose
{
auto input_tile_window =
make_tile_window(input_window, Policy::template MakeInputDistribution<Problem>());
//auto output_tile_window =
// make_tile_window(output_window, Policy::template MakeLdsLoadTileDistribution<Problem>());
auto output_tile_window =
make_tile_window(output_window, Policy::template MakeOutputDistribution<Problem>());
DataType* p_lds_ptr = static_cast<DataType*>(p_smem);
constexpr auto in_lds_block_desc = Policy::template MakeLdsStoreBlockDescriptor<Problem>();
@@ -147,19 +147,11 @@ struct BlockTranspose
store_tile(copy_to_lds_window, x);
block_sync_lds();
//auto y = load_tile(load_from_lds_window);
//Debug<remove_cvref_t<decltype(y)>> cccc;
// auto load_from_lds_window =
// make_tile_window(output_lds_block,
// make_tuple(number<kSecondSizePerBlock>{},
// number<kLeadSizePerBlock>{}), {0, 0}, Policy::template
// MakeLdsLoadTileDistribution<Problem>());
auto y = load_tile_transpose(load_from_lds_window);
//Debug<remove_cvref_t<decltype(y)>> cccc;
store_tile(output_window, y);
// auto out_tensor = make_static_distributed_tensor<DataType>(Policy::template MakeOutputDistribution<Problem>());
store_tile(output_tile_window, out_tensor);
}
};

View File

@@ -79,17 +79,18 @@ struct TransposePolicy
template <typename Problem>
CK_TILE_HOST_DEVICE static constexpr auto MakeOutputDistribution()
{
constexpr index_t BlockSize = Problem::kBlockSize;
constexpr index_t LeadDimPerBlock = Problem::kSecondSizePerBlock;
constexpr index_t SecondDimPerBlock = Problem::kLeadSizePerBlock;
constexpr index_t VecLoadSize = 16 / sizeof(typename Problem::DataType);
using TileEncodingPattern = TileDistributionEncodingPattern2D<BlockSize,
LeadDimPerBlock,
SecondDimPerBlock,
VecLoadSize,
TileAccessPattern>;
return TileEncodingPattern::Make2DStaticTileDistribution();
//constexpr index_t BlockSize = Problem::kBlockSize;
//constexpr index_t LeadDimPerBlock = Problem::kSecondSizePerBlock;
//constexpr index_t SecondDimPerBlock = Problem::kLeadSizePerBlock;
constexpr index_t VecLoadSize = 8 / sizeof(typename Problem::DataType);
//TODO, fix the tile distribution
return make_static_tile_distribution(
tile_distribution_encoding<sequence<>,
tuple<sequence<16>, sequence<4, VecLoadSize>>,
tuple<sequence<2, 1>>,
tuple<sequence<0, 0>>,
sequence<2>,
sequence<1>>{});
}
template <typename Problem>