[rocm-libraries] ROCm/rocm-libraries#5114 (commit 59b8cb5)

[CK][CK Tile] Improvements for grouped conv fwd tile
 profiling (#5114)

## Motivation

Improve profiling for grouped convolution forward for better comparison
between CK and CK Tile
## Technical Details

- Include preprocessing time for ck tile
- Add flush cache for conv fwd profiler
- Switch configs to builder reflect
- Add KPerXdl deduce
- Add non-grouped ported instances

## Test Plan

test_grouped_convnd_fwd_tile

## Test Result

pass

## Submission Checklist

- [x] Look over the contributing guidelines at
https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.

AICK-786
This commit is contained in:
Bartłomiej Kocot
2026-03-11 22:39:20 +00:00
committed by assistant-librarian[bot]
parent c1f2d8166d
commit 2169367735
24 changed files with 2375 additions and 1874 deletions

View File

@@ -1,264 +1,239 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,bf16,bf16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>

View File

@@ -1,255 +1,230 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,fp16,fp16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>

View File

@@ -1,177 +1,176 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Default, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Default, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Stride1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>

View File

@@ -1,264 +1,329 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,64,8,4,4,32,32,4,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,8,4,4,32,32,2,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,64,8,4,4,32,32,1,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,bf16,bf16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>

View File

@@ -1,255 +1,317 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,64,8,4,4,32,32,4,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,8,4,4,32,32,2,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,64,8,4,4,32,32,1,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,fp16,fp16,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>

View File

@@ -1,177 +1,229 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Default, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Default, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Stride1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>

View File

@@ -1,50 +1,47 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>

View File

@@ -1,50 +1,46 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>

View File

@@ -1,43 +1,35 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>

View File

@@ -1,50 +1,65 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>

View File

@@ -1,50 +1,63 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>

View File

@@ -1,43 +1,45 @@
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
# LargeTensor is temporary disable due to failures
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>