mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
Add other layouts for FP8 block scaled gemm (#2665)
* Start adding other layouts for gemm_ab_scale * Add some instances * Create tensor descriptors for A/B scales depending on A/B layout * Fix formatting * Revert some comments * Revert commented instances in CMakeLists.txt * Add some more instances for col-row gemm * enable more row,row instances * Use occupancy=1 for col,row layout to avoid spills
This commit is contained in:
@@ -231,11 +231,22 @@ struct DeviceGemmMultiD_ABScale_Xdl_CShuffle_V3
|
||||
}
|
||||
};
|
||||
|
||||
constexpr index_t minimum_occupancy =
|
||||
(BlkGemmPipeSched == BlockGemmPipelineScheduler::Intrawave &&
|
||||
MPerBlock * NPerBlock / BlockSize > 64)
|
||||
? 1
|
||||
: 2;
|
||||
constexpr index_t minimum_occupancy = [&]() {
|
||||
if constexpr(is_same_v<tensor_layout::gemm::ColumnMajor, ALayout> &&
|
||||
is_same_v<tensor_layout::gemm::RowMajor, BLayout>)
|
||||
{
|
||||
// FIXME: many instances have many spills with occupancy > 1, a better solution
|
||||
// needed to get best performance
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (BlkGemmPipeSched == BlockGemmPipelineScheduler::Intrawave &&
|
||||
MPerBlock * NPerBlock / BlockSize > 64)
|
||||
? 1
|
||||
: 2;
|
||||
}
|
||||
}();
|
||||
|
||||
if(has_main_k_block_loop)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user