Update tuning parameter & compilation options of DeviceGemmXdl<> instance (layout=TT) (#819)

* Enable pipeline v2 opt for layout=TT instance

* Use better thread mapping for reading A tile

* Conditionally enable pipeline v2 opt

* Allow enabling only fp16 gemm instances in profiler

* Fix formatting error

* Fix compilation error if we enable fp32 in profiler

[ROCm/composable_kernel commit: f7cc8c3b03]
This commit is contained in:
Po Yen Chen
2023-08-02 23:32:22 +08:00
committed by GitHub
parent 9996b8c375
commit 60371ab663
3 changed files with 42 additions and 33 deletions

View File

@@ -121,7 +121,10 @@ int profile_gemm(int argc, char* argv[])
return pass ? 0 : 1;
};
if(data_type == GemmDataType::F32_F32_F32 && layout == GemmMatrixLayout::MK_KN_MN)
if(false)
;
#ifdef __fp32__
else if(data_type == GemmDataType::F32_F32_F32 && layout == GemmMatrixLayout::MK_KN_MN)
{
return profile(Row{}, Row{}, Row{}, F32{}, F32{}, F32{}, F32{});
}
@@ -137,6 +140,8 @@ int profile_gemm(int argc, char* argv[])
{
return profile(Col{}, Col{}, Row{}, F32{}, F32{}, F32{}, F32{});
}
#endif
#ifdef __fp16__
else if(data_type == GemmDataType::F16_F16_F16 && layout == GemmMatrixLayout::MK_KN_MN)
{
return profile(Row{}, Row{}, Row{}, F16{}, F16{}, F32{}, F16{});
@@ -153,6 +158,7 @@ int profile_gemm(int argc, char* argv[])
{
return profile(Col{}, Col{}, Row{}, F16{}, F16{}, F32{}, F16{});
}
#endif
#ifdef __bf16__
else if(data_type == GemmDataType::BF16_BF16_BF16 && layout == GemmMatrixLayout::MK_KN_MN)
{