Add other layouts for FP8 block scaled gemm (#2665)

* Start adding other layouts for gemm_ab_scale

* Add some instances

* Create tensor descriptors for A/B scales depending on A/B layout

* Fix formatting

* Revert some comments

* Revert commented instances in CMakeLists.txt

* Add some more instances for col-row gemm

* enable more row,row instances

* Use occupancy=1 for col,row layout to avoid spills

[ROCm/composable_kernel commit: 26d3300930]
This commit is contained in:
Sami Remes
2025-08-18 11:46:10 +03:00
committed by GitHub
parent a4d70b6e13
commit 13bfcba04c
15 changed files with 758 additions and 13 deletions

View File

@@ -173,6 +173,40 @@ int profile_gemm_ab_scale(int argc, char* argv[])
Col{},
Row{});
}
else if(data_type == GemmDataType::F8_F8_BF16 && layout == GemmMatrixLayout::MK_KN_MN &&
scale_block_tile == ScaleBlockTile::Tile_1_128_128)
{
return profile(F8{},
F32{},
F8{},
F32{},
F8{},
F32{},
BF16{},
ck::Number<1>{},
ck::Number<128>{},
ck::Number<128>{},
Row{},
Row{},
Row{});
}
else if(data_type == GemmDataType::F8_F8_BF16 && layout == GemmMatrixLayout::KM_KN_MN &&
scale_block_tile == ScaleBlockTile::Tile_1_128_128)
{
return profile(F8{},
F32{},
F8{},
F32{},
F8{},
F32{},
BF16{},
ck::Number<1>{},
ck::Number<128>{},
ck::Number<128>{},
Col{},
Row{},
Row{});
}
else
{
std::cout << "this data_type & layout is not implemented" << std::endl;