update instances

This commit is contained in:
aska-0096
2024-12-13 03:29:15 +00:00
parent 2e9901bfa6
commit e8ca3daf4e
11 changed files with 298 additions and 18 deletions

View File

@@ -617,8 +617,8 @@ struct GridwiseGemm_xdl_cshuffle_v3
// A matrix in LDS memory, dst of blockwise copy
if constexpr(ABlockLdsExtraM || BlkGemmPipelineVer == BlockGemmPipelineVersion::v4)
{
// bank conflict when writting the data into LDS, but don't worry, we have whole entire loop to hide it in v4.
// it may give you some benefit from less valu in compute address
// bank conflict when writting the data into LDS, but don't worry, we have whole entire
// loop to hide it in v4. it may give you some benefit from less valu in compute address
return make_naive_tensor_descriptor(
make_tuple(AK0Number, Number<MPerBlock>{}, AK1Number),
make_tuple(Number<MPerBlock>{} * AK1Number, AK1Number, I1));
@@ -756,8 +756,8 @@ struct GridwiseGemm_xdl_cshuffle_v3
// B matrix in LDS memory, dst of blockwise copy
if constexpr(BBlockLdsExtraN || BlkGemmPipelineVer == BlockGemmPipelineVersion::v4)
{
// bank conflict when writting the data into LDS, but don't worry, we have whole entire loop to hide it in v4.
// it may give you some benefit from less valu in compute address
// bank conflict when writting the data into LDS, but don't worry, we have whole entire
// loop to hide it in v4. it may give you some benefit from less valu in compute address
return make_naive_tensor_descriptor(
make_tuple(BK0Number, Number<NPerBlock>{}, BK1Number),
make_tuple(Number<NPerBlock + BBlockLdsExtraN>{} * BK1Number, BK1Number, I1));

View File

@@ -1718,7 +1718,7 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3
// make sure it's safe to write to LDS
block_sync_lds();
if constexpr(access_id < num_access - 1)
if constexpr(access_id < num_access - 1)
{
constexpr auto shuffle_m0 =
sfc_c_vgpr.GetIndexTupleOfNumber(access_id + Number<1>{})[Number<0>{}];
@@ -1789,7 +1789,7 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3
tie(e_grid_desc_mblock_mperblock_nblock_nperblock),
I0,
cde_lds_and_global_step);
EpilogueScheduler();
}
});
@@ -2236,7 +2236,7 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3
static_assert(num_access == sfc_cde_block.GetNumOfAccess(), "wrong!");
static_for<0, CShuffleMXdlPerWavePerShuffle, 1>{}([&](auto m0) {
static_for<0, CShuffleMXdlPerWavePerShuffle, 1>{}([&](auto m0) {
static_for<0, CShuffleNXdlPerWavePerShuffle, 1>{}([&](auto n0) {
static_for<0, KRepeat, 1>{}([&](auto k0) {
vector_type<ComputeTypeA, KPack> a_thread_vec;
@@ -2340,7 +2340,7 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3
tie(e_grid_desc_mblock_mperblock_nblock_nperblock),
I0,
cde_lds_and_global_step);
EpilogueScheduler();
}
});