mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 20:40:07 +00:00
Update tuning parameter & compilation options of DeviceGemmXdl<> instance (layout=TT) (#819)
* Enable pipeline v2 opt for layout=TT instance
* Use better thread mapping for reading A tile
* Conditionally enable pipeline v2 opt
* Allow enabling only fp16 gemm instances in profiler
* Fix formatting error
* Fix compilation error if we enable fp32 in profiler
[ROCm/composable_kernel commit: f7cc8c3b03]
This commit is contained in:
@@ -121,7 +121,10 @@ int profile_gemm(int argc, char* argv[])
|
||||
return pass ? 0 : 1;
|
||||
};
|
||||
|
||||
if(data_type == GemmDataType::F32_F32_F32 && layout == GemmMatrixLayout::MK_KN_MN)
|
||||
if(false)
|
||||
;
|
||||
#ifdef __fp32__
|
||||
else if(data_type == GemmDataType::F32_F32_F32 && layout == GemmMatrixLayout::MK_KN_MN)
|
||||
{
|
||||
return profile(Row{}, Row{}, Row{}, F32{}, F32{}, F32{}, F32{});
|
||||
}
|
||||
@@ -137,6 +140,8 @@ int profile_gemm(int argc, char* argv[])
|
||||
{
|
||||
return profile(Col{}, Col{}, Row{}, F32{}, F32{}, F32{}, F32{});
|
||||
}
|
||||
#endif
|
||||
#ifdef __fp16__
|
||||
else if(data_type == GemmDataType::F16_F16_F16 && layout == GemmMatrixLayout::MK_KN_MN)
|
||||
{
|
||||
return profile(Row{}, Row{}, Row{}, F16{}, F16{}, F32{}, F16{});
|
||||
@@ -153,6 +158,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
{
|
||||
return profile(Col{}, Col{}, Row{}, F16{}, F16{}, F32{}, F16{});
|
||||
}
|
||||
#endif
|
||||
#ifdef __bf16__
|
||||
else if(data_type == GemmDataType::BF16_BF16_BF16 && layout == GemmMatrixLayout::MK_KN_MN)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user