[CK][CONV] Support NCHW in class DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle (#2375)

1. When conv spec is 1x1 stride1 pad0, nchw is equal with matrix A + column major, we only need minor change in conv transformer to support it.
2. when out is NKHW, it is equal with matrix C with column major. we need swap A & B to get best performance.
3. Add new instance device_grouped_conv_fwd_xdl_f16_nchw_instances for nchw.
This commit is contained in:
linqunAMD
2025-06-26 08:32:39 +08:00
committed by GitHub
parent a14753b86f
commit 1749c0409e
6 changed files with 552 additions and 137 deletions

View File

@@ -53,6 +53,15 @@ void add_device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard(
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_nchw_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -31,6 +31,14 @@ void add_device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f16_instances(
Empty_Tuple,
NGKHW,
ConvFwdDefault>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_nchw_instances<2,
NGCHW,
GKYXC,
Empty_Tuple,
NGKHW,
ConvFwd1x1S1P0>{});
}
} // namespace instance

View File

@@ -47,6 +47,15 @@ void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_16x16_instances(
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_nchw_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>{});
}
} // namespace instance