mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 04:31:25 +00:00
Add new instances for merging multiple fwd conv groups into a single GEMM batch. Allow group merging for C > 1 when vector load/store size is 1 for the output tensor. (#3639)
Co-authored-by: Ville Pietilä <>
This commit is contained in:
@@ -1513,7 +1513,7 @@ struct DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle
|
||||
|
||||
if constexpr(NumGroupsToMerge > 1)
|
||||
{
|
||||
if(!(C == 1))
|
||||
if(!(C == 1) && CDEBlockTransferScalarPerVector_NPerBlock > 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user