mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 20:40:07 +00:00
Add other layouts for FP8 block scaled gemm (#2665)
* Start adding other layouts for gemm_ab_scale
* Add some instances
* Create tensor descriptors for A/B scales depending on A/B layout
* Fix formatting
* Revert some comments
* Revert commented instances in CMakeLists.txt
* Add some more instances for col-row gemm
* enable more row,row instances
* Use occupancy=1 for col,row layout to avoid spills
[ROCm/composable_kernel commit: 26d3300930]
This commit is contained in:
@@ -231,11 +231,22 @@ struct DeviceGemmMultiD_ABScale_Xdl_CShuffle_V3
|
||||
}
|
||||
};
|
||||
|
||||
constexpr index_t minimum_occupancy =
|
||||
(BlkGemmPipeSched == BlockGemmPipelineScheduler::Intrawave &&
|
||||
MPerBlock * NPerBlock / BlockSize > 64)
|
||||
? 1
|
||||
: 2;
|
||||
constexpr index_t minimum_occupancy = [&]() {
|
||||
if constexpr(is_same_v<tensor_layout::gemm::ColumnMajor, ALayout> &&
|
||||
is_same_v<tensor_layout::gemm::RowMajor, BLayout>)
|
||||
{
|
||||
// FIXME: many instances have many spills with occupancy > 1, a better solution
|
||||
// needed to get best performance
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (BlkGemmPipeSched == BlockGemmPipelineScheduler::Intrawave &&
|
||||
MPerBlock * NPerBlock / BlockSize > 64)
|
||||
? 1
|
||||
: 2;
|
||||
}
|
||||
}();
|
||||
|
||||
if(has_main_k_block_loop)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user