mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 14:29:05 +00:00
ck-builder: add remaining ck factory tests (#3223)
Now that the remaining reflection has been implemented, we can add the remaining factory tests too. This is the complete set of instances for forward grouped conv currently in CK.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -55,7 +55,54 @@ struct Bilinear_F32
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -66,7 +113,54 @@ struct Bilinear_F32_TF32
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32),fp32,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -77,7 +171,54 @@ struct Bilinear_F16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16),fp16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -88,7 +229,54 @@ struct Bilinear_BF16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16),bf16,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -99,7 +287,54 @@ struct Bilinear_INT8
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,s8,s8,s32,s8,Tuple(s8),s8,PassThrough,PassThrough,Bilinear,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -76,7 +76,54 @@ struct F8_ConvScale
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -94,7 +141,54 @@ struct F8_BF8_comb1_ConvScale
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,bf8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -112,7 +206,54 @@ struct F8_BF8_comb2_ConvScale
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,bf8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,bf8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,bf8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -130,7 +271,54 @@ struct F8_BF8_comb3_ConvScale
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf8,fp8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -148,7 +336,54 @@ struct F8_float_CombConvScale
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -166,7 +401,54 @@ struct F8_ConvScaleRelu
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvScaleRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -184,7 +466,54 @@ struct F8_CombConvScaleRelu
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,UnaryCombinedOp,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -202,7 +531,54 @@ struct F8_ConvScaleAdd
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK),NDHWGK,fp8,fp8,fp32,fp32,Tuple(fp32),fp8,PassThrough,PassThrough,ConvScaleAdd,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -220,7 +596,54 @@ struct F8_ConvInvscale
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp8,fp8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp8,fp8,fp32,fp32,EmptyTuple,fp8,PassThrough,PassThrough,ConvInvscale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp8,fp8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
|
||||
@@ -85,7 +85,9 @@ struct DyOp_F32_2
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -96,7 +98,9 @@ struct DyOp_F32_3
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -107,7 +111,9 @@ struct DyOp_F16_2
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -118,7 +124,9 @@ struct DyOp_F16_3
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -129,7 +137,9 @@ struct DyOp_BF16_2
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -140,7 +150,9 @@ struct DyOp_BF16_3
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -151,7 +163,9 @@ struct DyOp_INT8_2
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -162,7 +176,9 @@ struct DyOp_INT8_3
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,DynamicUnaryOp,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
|
||||
@@ -53,7 +53,54 @@ struct F32
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -64,7 +111,54 @@ struct F32_TF32
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,tf32,tf32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),4,tf32,tf32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -75,7 +169,54 @@ struct F16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -86,7 +227,54 @@ struct BF16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -97,7 +285,54 @@ struct S8
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,s8,s8,s32,s8,EmptyTuple,s8,PassThrough,PassThrough,Scale,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),8,s8,s8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
|
||||
@@ -53,7 +53,18 @@ struct F32
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp32,fp32),Tuple(fp32,fp32),fp32,fp32,EmptyTuple,fp32,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -64,7 +75,18 @@ struct F16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(fp16,fp16),Tuple(fp16,fp16),fp32,fp16,EmptyTuple,fp16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -75,7 +97,18 @@ struct BF16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(bf16,bf16),Tuple(bf16,bf16),fp32,bf16,EmptyTuple,bf16,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -86,7 +119,18 @@ struct S8
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,Tuple(s8,s8),Tuple(s8,s8),s32,s8,EmptyTuple,s8,ScaleAdd,ScaleAdd,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,1,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
|
||||
@@ -54,7 +54,18 @@ struct F32
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp32,fp32,fp32,fp32,Tuple(fp32,fp32),fp32,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,1,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -65,7 +76,18 @@ struct F16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,fp16,fp16,fp32,fp16,Tuple(fp16,fp16),fp16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -76,7 +98,18 @@ struct BF16
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,bf16,bf16,fp32,bf16,Tuple(bf16,bf16),bf16,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
@@ -87,7 +120,18 @@ struct S8
|
||||
|
||||
constexpr static auto expected = {
|
||||
// clang-format off
|
||||
""
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,32,1,8),8,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>",
|
||||
"DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,Tuple(NDHWGK,G_K),NDHWGK,s8,s8,s32,s8,Tuple(fp32,fp32),s8,PassThrough,PassThrough,ScaleAddScaleAddRelu,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,1,1,1,Seq(1,16,1,4),1,s8,s8,Default,1>"
|
||||
// clang-format on
|
||||
};
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user