[rocm-libraries] ROCm/rocm-libraries#5516 (commit ff3afda)

[CK_TILE, CK_BUILDER] Add bwd data to CK Tile profiler
 (#5516)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Motivation

We want close the performance gap between old CK and CK Tile for bwd
data convolutions. To achieve this, we need tow things

- Configurations for the old CK kernel instances such that we can map
them into CK Tile instances.
- Support in CK profiler to run the CK Tile instance with the same API
as for old CK instances.

## Technical Details

Extracted kernel configurations from old CK. The codegen python script
for CK Tile convs is extended to support also bwd data. The generated
instances are added to the CMake build (target
`device_grouped_conv_bwd_data_tile_instances`).
A new profiler op (`grouped_conv_bwd_data_tile`) has been added to the
CK Profiler. The API is same as for old CK's profiler op
`grouped_conv_bwd_data`.
This commit is contained in:
Ville Pietilä
2026-03-25 14:36:11 +00:00
committed by assistant-librarian[bot]
parent 1834e318da
commit ec2dbfbfde
29 changed files with 1588 additions and 956 deletions

View File

@@ -31,6 +31,11 @@ if(GPU_TARGETS MATCHES "gfx9")
add_instance_library(device_grouped_conv_bwd_weight_tile_instances ${GROUPED_CONV_BWD_WEIGHT_TILE})
target_include_directories(device_grouped_conv_bwd_weight_tile_instances PRIVATE
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
target_compile_options(device_grouped_conv_bwd_weight_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
file(GLOB_RECURSE GROUPED_CONV_BWD_DATA_TILE "${CMAKE_CURRENT_BINARY_DIR}/backward_data/*.cpp")
add_instance_library(device_grouped_conv_bwd_data_tile_instances ${GROUPED_CONV_BWD_DATA_TILE})
target_include_directories(device_grouped_conv_bwd_data_tile_instances PRIVATE
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
target_compile_options(device_grouped_conv_bwd_data_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
endif()

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,70 +1,70 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,70 +1,70 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,14 +1,14 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,14 +1,14 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -144,6 +144,7 @@ def copy_includes(instances_path):
output_dir.mkdir(parents=True, exist_ok=True)
shutil.copy(f"{inc_dir}/include/instance_includes.inc", instances_path)
shutil.copy(f"{inc_dir}/include/instance_run.inc", instances_path)
shutil.copy(f"{inc_dir}/include/signatures.hpp", instances_path)
def generate_calls_inc(instances, problem_name, direction, filter_pattern):
generate_dir = Path(__file__).resolve().parent
@@ -467,8 +468,131 @@ def parse_bwd_weight_instances(instances, problem_name):
def parse_bwd_data_instances(instances, problem_name):
convs = []
print("Parsing backward data instances is not supported yet, skipping all instances.")
# TODO: Implement parsing logic for backward data instances.
for instance_id, instance in enumerate(instances):
if instance.find("#") != -1 or instance.find(";") != -1:
continue
start = instance.index('<') + 1
end = instance.rindex('>')
params_str = instance[start:end]
args = parse_instance_string(params_str)
is_v1_instance = instance.find("Xdl_CShuffle<") != -1
if is_v1_instance:
if len(args) != 51:
raise RuntimeError(f"Wrong number of parameters in the V1 XDL CShuffle instance string: {instance}\n" +
f"Expected 51 parameters for V1 instance. Found {len(args)} parameters.")
else:
raise RuntimeError(f"Only V1 XDL CShuffle instances are supported for backward data. Found instance: {instance}")
spec = args[13]
block_size = int(args[17])
m_per_block = int(args[18])
n_per_block = int(args[19])
k_per_block = int(args[20])
ak1 = int(args[21])
bk1 = int(args[22])
m_per_xdl = int(args[23])
n_per_xdl = int(args[24])
m_xdl_per_wave = int(args[25])
n_xdl_per_wave = int(args[26])
a_scalar_per_vector = int(args[31])
b_scalar_per_vector = int(args[38])
c_scalar_per_vector = int(args[44])
if ak1 != bk1:
raise RuntimeError(f"Not supported instance {instance_id} since ak1 != bk1. ak1: {ak1}, bk1: {bk1} in instance: {instance}")
k1 = min(ak1, bk1)
# TODO: Do we need split image for 3D bwd data convs?
split_image = False
# Default optimization parameters
num_groups_to_merge = 1
is_two_stage_instance = False
is_explicit_gemm = False
num_wave_groups = 1
direct_load = False
# Block GEMM pipeline parameters
block_gemm_pipeline_scheduler = args[46]
if block_gemm_pipeline_scheduler == "Default":
block_gemm_pipeline_scheduler = "Intrawave"
blk_gemm_pipeline_version = "v1"
if block_gemm_pipeline_scheduler == "Interwave":
blk_gemm_pipeline_version = "v1"
# Sanity check for Block GEMM pipeline parameters
# Scheduler must be either Intrawave or Interwave.
# Version must be from v1 to v5
if block_gemm_pipeline_scheduler not in ["Intrawave", "Interwave"]:
raise RuntimeError(f"Invalid Block GEMM pipeline scheduler: {block_gemm_pipeline_scheduler} in instance: {instance}")
if blk_gemm_pipeline_version not in ["v1", "v2", "v3", "v4", "v5"]:
raise RuntimeError(f"Invalid Block GEMM pipeline version: {blk_gemm_pipeline_version} in instance: {instance}")
double_smem_buffer = blk_gemm_pipeline_version == "v4"
scheduler = block_gemm_pipeline_scheduler
pipeline_version = blk_gemm_pipeline_version.upper()
# Old CK pipeline version V5 maps to V6 for CK Tile
if pipeline_version == "V5":
pipeline_version = "V6"
if direct_load:
if pipeline_version == "V1":
pipeline_version = "ASYNC_V1"
elif pipeline_version == "V4":
pipeline_version = "ASYNC_V4"
else:
raise RuntimeError(
f"Not supported pipeline for direct load: pipeline_version={pipeline_version} in instance: {instance}"
)
m_warp = int(m_per_block / (m_per_xdl * m_xdl_per_wave))
n_warp = int(n_per_block / (n_per_xdl * n_xdl_per_wave))
warp_size = 64
k_warp = int(block_size / (warp_size * m_warp * n_warp))
dtype = get_dtype(problem_name)
k_per_xdl = max(k1, get_k_mfma(dtype, m_per_xdl, n_per_xdl))
if check_vectors(a_scalar_per_vector, b_scalar_per_vector, c_scalar_per_vector) == False:
print(f"Skipping instance {instance_id} with irregular load since it's not supported yet.")
continue
if pipeline_version == "V6":
print(f"Skipping instance {instance_id} with V6 since it's not supported yet.")
continue
# Check vector sizes for A and B tensors - we cannot oversubscribe.
num_tile_elements_a = m_per_xdl * k_per_xdl
num_tile_elements_b = n_per_xdl * k_per_xdl
max_vector_size_a = max(1, num_tile_elements_a // block_size)
max_vector_size_b = max(1, num_tile_elements_b // block_size)
a_scalar_per_vector = min(a_scalar_per_vector, max_vector_size_a)
b_scalar_per_vector = min(b_scalar_per_vector, max_vector_size_b)
conv = ConvInstanceTemplateParams(
spec,
[m_per_block, n_per_block, k_per_block],
[m_warp, n_warp, k_warp],
[m_per_xdl, n_per_xdl, k_per_xdl],
double_smem_buffer,
num_wave_groups,
is_two_stage_instance,
pipeline_version,
scheduler,
[a_scalar_per_vector, b_scalar_per_vector, c_scalar_per_vector],
num_groups_to_merge,
split_image,
is_explicit_gemm,
instance_id,
)
convs.append(conv)
return convs
def generate_instances_fwd(instances, problem_name, config, filter_pattern, instances_path):

View File

@@ -1,177 +1,8 @@
#include "../../builder/test/utils/ckb_conv_tile_test_configs.hpp"
#include "ck_tile/builder/testing/conv/fwd.hpp"
#include "ck_tile/builder/testing/conv/bwd_weight.hpp"
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
#include "ck_tile/builder/testing/conv/bwd_data.hpp"
#include "signatures.hpp"
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
namespace cku = ck_tile::builder::test_utils;
namespace ckf = ck_tile::builder::factory;
namespace ck_tile::builder::profiling {
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
// Backward Weight Signatures
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
// Backward Data Signatures
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
} // namespace ck_tile::builder::profiling

View File

@@ -0,0 +1,186 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include <tuple>
#include "../../builder/test/impl/conv_signature_types.hpp"
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
namespace ck_tile::builder::profiling {
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
/////////////////////////////////////////
// BWD WEIGHT signatures
//////////////////////////////////////////
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
/////////////////////////////////////////
// BWD DATA signatures
//////////////////////////////////////////
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
} // namespace ck_tile::builder::profiling

View File

@@ -50,6 +50,7 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
"-D__HIP_PLATFORM_AMD__",
"-D CK_EXPERIMENTAL_BUILDER=ON",
"-O3",
"-Wno-unknown-warning-option",
*include_flags,
str(cpp_file),
"-o", str(output_file)
@@ -63,10 +64,15 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
timeout=300 # 5 minute timeout per file
)
print(f"\n\n Command: {' '.join(cmd)}\n") if verbose else None
if result.returncode == 0:
return True, ""
else:
# Extract the key error message
if verbose and result.stderr:
print(f" {result.stderr}")
print()
error_output = result.stderr
return False, error_output