mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[rocm-libraries] ROCm/rocm-libraries#5114 (commit 59b8cb5)
[CK][CK Tile] Improvements for grouped conv fwd tile profiling (#5114) ## Motivation Improve profiling for grouped convolution forward for better comparison between CK and CK Tile ## Technical Details - Include preprocessing time for ck tile - Add flush cache for conv fwd profiler - Switch configs to builder reflect - Add KPerXdl deduce - Add non-grouped ported instances ## Test Plan test_grouped_convnd_fwd_tile ## Test Result pass ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. AICK-786
This commit is contained in:
committed by
assistant-librarian[bot]
parent
c1f2d8166d
commit
2169367735
@@ -68,7 +68,14 @@ mkdir -p forward/tests
|
||||
mkdir -p backward_weight/tests
|
||||
mkdir -p backward_data/tests
|
||||
|
||||
# Do not change the existing fwd test configs
|
||||
# For FWD, generate new test configs by taking 20% of the profiler configs for each data type and layout
|
||||
for layout in nhwgc ndhwgc; do
|
||||
for dtype in fp32 fp16 bf16; do
|
||||
profiler_config="forward/profiler/${layout}_${dtype}.conf"
|
||||
test_config="forward/tests/${layout}_${dtype}.conf"
|
||||
awk 'NR % 5 == 0' $profiler_config > $test_config # 20% of lines in the profiler configs
|
||||
done
|
||||
done
|
||||
|
||||
# For BWD weight, generate new test configs by taking 20% of the profiler configs for each data type and layout
|
||||
for layout in nhwgc ndhwgc; do
|
||||
|
||||
@@ -1,264 +1,239 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,bf16,bf16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
|
||||
|
||||
@@ -1,255 +1,230 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,fp16,fp16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
|
||||
@@ -1,177 +1,176 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Default, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Default, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Stride1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
|
||||
@@ -1,264 +1,329 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 64, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 64, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v5>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,64,8,4,4,32,32,4,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,8,4,4,32,32,2,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,64,8,4,4,32,32,1,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,bf16,bf16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,64,64,64,8,8,32,32,1,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,bf16,bf16,false,1>
|
||||
|
||||
@@ -1,255 +1,317 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 32, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 2, 2, 2, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Default, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Default, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Default, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 32, 32, 4, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 224, 256, 64, Filter1x1Stride1Pad0, 16, 16, 7, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 224, 64, Filter1x1Stride1Pad0, 16, 16, 8, 7, 8, 8, 8, 2, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 128, 32, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Default, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 16, 256, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 32, 256, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Default, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,8,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,2,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,64,8,4,4,32,32,4,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,8,4,4,32,32,2,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,64,8,4,4,32,32,1,2,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,16,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,8),2,fp16,fp16,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,16,256,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
|
||||
@@ -1,177 +1,229 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Default, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Default, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Default, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Default, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 256, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 256, 16, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 64, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 128, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<128, 32, 128, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 32, 16, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 16, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 128, 192, 16, Filter1x1Stride1Pad0, 32, 32, 2, 3, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 1, 2, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 4, 4, 4, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 16, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<256, 256, 128, 16, Default, 32, 32, 4, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Default, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Default, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Default, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Default, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Default, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Default, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Default, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 128, 16, 64, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 32, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 64, 16, 64, Filter1x1Stride1Pad0, 16, 16, 2, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 64, 64, Filter1x1Stride1Pad0, 16, 16, 1, 2, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 64, 64, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 128, 64, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 64, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,64,128,16,4,4,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,128,192,16,4,4,32,32,2,3,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
|
||||
@@ -1,50 +1,47 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
|
||||
@@ -1,50 +1,46 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,32,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,32,256,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,16),8,Interwave,v2,fp16,fp16,false,1>
|
||||
|
||||
@@ -1,43 +1,35 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<3,NDHWGC,GKZYXC,EmptyTuple,NDHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
|
||||
@@ -1,50 +1,65 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,bf16,bf16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,bf16,bf16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,bf16,bf16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,64,128,64,8,8,32,32,1,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,64,64,8,8,32,32,2,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v1,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,16,64,8,8,16,16,4,1,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,bf16,bf16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,bf16,bf16,fp32,bf16,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,bf16,bf16,false,1>
|
||||
|
||||
@@ -1,50 +1,63 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Default, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 32, 64, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Default, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
# DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<64, 16, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3_DirectLoad<256, 64, 64, 64, Filter1x1Stride1Pad0, 16, 16, 2, 2, 2, 2, 4, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,64,32,8,8,32,32,2,2,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,128,128,32,32,8,8,32,32,2,1,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(4,4,8),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,64,8,4,4,32,32,2,1,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,Seq(2,32,4),Seq(1,0,2),Seq(1,0,2),2,1,1,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,16,1,8),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,1,1,Seq(1,32,1,4),8,fp16,fp16,Default,1>
|
||||
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp16,fp16,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,256,128,32,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,1,1,Seq(1,32,1,4),4,Interwave,v1,fp16,fp16,false,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,224,256,64,8,8,16,16,7,8,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,256,256,32,8,8,32,32,4,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,256,224,64,8,8,16,16,8,7,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,2,1,Seq(1,64,1,4),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,256,256,32,8,8,16,16,8,8,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,2,Seq(1,32,1,8),8,Intrawave,v3,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,32,1,8),8,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp16,fp16,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp16,fp16,fp32,fp16,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp16,fp16,false,1>
|
||||
|
||||
@@ -1,43 +1,45 @@
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Default, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 64, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 32, 64, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 8, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Default, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<256, 64, 64, 32, Filter1x1Stride1Pad0, 16, 16, 2, 2, 1, 2, 1, 1, 1, 1>
|
||||
# LargeTensor is temporary disable due to failures
|
||||
# DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor<64, 64, 64, 32, Default, 32, 32, 2, 2, 1, 1, 1, 1, 1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Default, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 16>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<64, 64, 16, 16, Filter3x3, 16, 16, 4, 1, 4, 1, 1, 1, 1, 32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 32, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v4>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Default, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 256, 256, 32, Filter1x1Stride1Pad0, 16, 16, 8, 8, 8, 8, 8, 1, 2, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v3>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Default, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 128, 64, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v6>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Default, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<256, 128, 256, 32, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 8, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Default, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 16, 32, 64, Default, 16, 16, 1, 1, 8, 8, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 16, 64, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 2, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Default, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<64, 16, 16, 128, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 4, 4, 1, 1, BlkGemmPipelineScheduler: Interwave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Default, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<128, 32, 128, 32, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 8, 1, 1, BlkGemmPipelineScheduler: Intrawave, BlkGemmPipelineVersion: v2>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,128,256,16,4,4,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,32,16,4,4,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,256,128,128,16,4,4,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,64,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,1,128,128,32,16,4,4,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,64,64,16,4,4,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,8,1,8),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,128,128,128,16,4,4,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,128,64,16,4,4,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,64,32,64,16,4,4,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,8),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,256,256,128,16,4,4,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,16,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,64,128,16,4,4,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,1,128,32,128,16,4,4,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,1,1,Seq(1,8,1,16),4,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,1,1,Seq(1,32,1,4),2,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,1,256,64,64,32,8,8,16,16,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,2,8,true,1,1,Seq(1,32,1,4),1,fp32,fp32,Default,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,8>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter3x3,MNKPadding,1,64,64,16,16,4,4,16,16,4,1,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,1,1,Seq(1,16,1,4),1,fp32,fp32,Default,32>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v4,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v3,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,256,128,128,64,8,8,32,32,2,2,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,32,1,8),8,Intrawave,v5,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,128,16,64,8,8,16,16,4,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,32,128,64,8,8,32,32,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Intrawave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,128,32,64,8,8,32,32,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,MNKPadding,128,16,128,64,8,8,16,16,1,4,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,16,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Pad0,MNKPadding,128,32,64,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),8,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,64,16,16,64,8,8,16,16,1,1,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,8,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,64,16,64,8,8,16,16,2,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),2,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,MNKPadding,128,16,64,64,8,8,16,16,1,2,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,64,16,16,128,8,8,16,16,1,1,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(16,4,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,4),4,Interwave,v1,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,64,32,64,8,8,32,32,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3<2,NHWGC,GKYXC,EmptyTuple,NHWGK,fp32,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,OddC,MNKPadding,128,16,32,64,8,8,16,16,1,1,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,Seq(8,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,false,1,1,Seq(1,16,1,8),4,Interwave,v2,fp32,fp32,false,1>
|
||||
|
||||
@@ -108,6 +108,33 @@ def check_vectors(a_scalar_per_vector, b_scalar_per_vector, c_scalar_per_vector)
|
||||
return False
|
||||
return True
|
||||
|
||||
def parse_instance_string(instance_string):
|
||||
"""Parse instance string, treating Seq(...) as a single parameter."""
|
||||
params = []
|
||||
current_param = ""
|
||||
paren_depth = 0
|
||||
|
||||
for char in instance_string:
|
||||
if char == '(':
|
||||
paren_depth += 1
|
||||
current_param += char
|
||||
elif char == ')':
|
||||
paren_depth -= 1
|
||||
current_param += char
|
||||
elif char == ',' and paren_depth == 0:
|
||||
# Only split on comma if we're not inside parentheses
|
||||
params.append(current_param.strip())
|
||||
current_param = ""
|
||||
else:
|
||||
current_param += char
|
||||
|
||||
# Add the last parameter
|
||||
if current_param.strip():
|
||||
params.append(current_param.strip())
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def generate_calls_inc(instances, problem_name, direction, filter_pattern):
|
||||
generate_dir = Path(__file__).resolve().parent
|
||||
output_dir = Path(f"{generate_dir}/instances/{direction}")
|
||||
@@ -168,69 +195,80 @@ def parse_fwd_instances(instances, problem_name):
|
||||
for instance_id, instance in enumerate(instances):
|
||||
if instance.find("#") != -1 or instance.find(";") != -1:
|
||||
continue
|
||||
instance_args_list = instance[instance.find("<") + 1 : instance.find(">")]
|
||||
args = instance_args_list.split(", ")
|
||||
start = instance.index('<') + 1
|
||||
end = instance.rindex('>')
|
||||
params_str = instance[start:end]
|
||||
args = parse_instance_string(params_str)
|
||||
|
||||
block_size = int(args[0])
|
||||
m_per_block = int(args[1])
|
||||
n_per_block = int(args[2])
|
||||
k_per_block = int(args[3])
|
||||
spec = args[4]
|
||||
m_per_xdl = int(args[5])
|
||||
n_per_xdl = int(args[6])
|
||||
m_xdl_per_wave = int(args[7])
|
||||
n_xdl_per_wave = int(args[8])
|
||||
a_scalar_per_vector = int(args[9])
|
||||
b_scalar_per_vector = int(args[10])
|
||||
c_scalar_per_vector = int(args[11])
|
||||
if len(args) == 15:
|
||||
num_groups_to_merge = int(args[14])
|
||||
elif len(args) != 16 and len(args) != 14:
|
||||
raise RuntimeError("wrong number of parameters")
|
||||
is_v3_instance = instance.find("Xdl_CShuffle_V3") != -1
|
||||
split_image = instance.find("Large_Tensor") != -1
|
||||
|
||||
if is_v3_instance:
|
||||
spec = args[14]
|
||||
block_size = int(args[16])
|
||||
m_per_block = int(args[17])
|
||||
n_per_block = int(args[18])
|
||||
k_per_block = int(args[19])
|
||||
k1 = int(args[20])
|
||||
m_per_xdl = int(args[22])
|
||||
n_per_xdl = int(args[23])
|
||||
m_xdl_per_wave = int(args[24])
|
||||
n_xdl_per_wave = int(args[25])
|
||||
a_scalar_per_vector = int(args[30])
|
||||
b_scalar_per_vector = int(args[37])
|
||||
c_scalar_per_vector = int(args[43])
|
||||
scheduler = args[44]
|
||||
pipeline_version = args[45]
|
||||
direct_load = args[48] == "true"
|
||||
num_groups_to_merge = int(args[49])
|
||||
else:
|
||||
num_groups_to_merge = 1
|
||||
split_image = instance.find("Large") != -1
|
||||
double_smem_buffer = instance.find("BlkGemmPipelineVersion: v4") != -1
|
||||
spec = args[14]
|
||||
block_size = int(args[17])
|
||||
m_per_block = int(args[18])
|
||||
n_per_block = int(args[19])
|
||||
k_per_block = int(args[20])
|
||||
k1 = int(args[21])
|
||||
m_per_xdl = int(args[23])
|
||||
n_per_xdl = int(args[24])
|
||||
m_xdl_per_wave = int(args[25])
|
||||
n_xdl_per_wave = int(args[26])
|
||||
a_scalar_per_vector = int(args[31])
|
||||
b_scalar_per_vector = int(args[38])
|
||||
c_scalar_per_vector = int(args[44])
|
||||
scheduler = "Intrawave"
|
||||
pipeline_version = "v1"
|
||||
direct_load = 0
|
||||
num_groups_to_merge = 0 if split_image else int(args[48])
|
||||
|
||||
double_smem_buffer = pipeline_version == "v4"
|
||||
num_wave_groups = 1
|
||||
scheduler = (
|
||||
"Intrawave" if instance.find("BlkGemmPipelineScheduler") == -1 else args[14]
|
||||
)
|
||||
pipeline_version = (
|
||||
"v1" if instance.find("BlkGemmPipelineVersion") == -1 else args[15]
|
||||
)
|
||||
# Replace pipeline if Direct Load
|
||||
if instance.find("DirectLoad") != -1:
|
||||
if instance.find("BlkGemmPipelineVersion: v1") != -1:
|
||||
if direct_load:
|
||||
if pipeline_version == "v1":
|
||||
pipeline_version = "ASYNC_V1"
|
||||
elif instance.find("BlkGemmPipelineVersion: v4") != -1:
|
||||
elif pipeline_version == "v4":
|
||||
pipeline_version = "ASYNC_V4"
|
||||
else:
|
||||
raise RuntimeError("not supported pipeline for direct load")
|
||||
raise RuntimeError(f"{pipeline_version} not supported pipeline for direct load")
|
||||
else:
|
||||
pipeline_version = f"""V{pipeline_version[-1:]}"""
|
||||
pipeline_version = pipeline_version.upper()
|
||||
|
||||
m_warp = int(m_per_block / (m_per_xdl * m_xdl_per_wave))
|
||||
n_warp = int(n_per_block / (n_per_xdl * n_xdl_per_wave))
|
||||
warp_size = 64
|
||||
k_warp = int(block_size / (warp_size * m_warp * n_warp))
|
||||
dtype = get_dtype(problem_name)
|
||||
# TODO: Make it more flexible
|
||||
# k_per_xdl = f"ck_tile::get_k_warp_tile<{dtype}, {m_per_xdl}>()"
|
||||
if dtype == "float":
|
||||
if m_per_xdl == 32:
|
||||
if instance.find("BlkGemmPipelineVersion") == -1:
|
||||
k_per_xdl = 4
|
||||
else:
|
||||
# Increase for universal gemm
|
||||
k_per_xdl = 8
|
||||
else:
|
||||
k_per_xdl = 8
|
||||
else:
|
||||
if m_per_xdl == 32:
|
||||
k_per_xdl = 16
|
||||
else:
|
||||
k_per_xdl = 32
|
||||
k_per_xdl = min(k_per_xdl, k_per_block)
|
||||
k_per_xdl = max(k1, get_k_mfma(dtype, m_per_xdl, n_per_xdl))
|
||||
|
||||
if split_image:
|
||||
print(f"Skipping instance {instance_id} with split_image since it's not supported yet.")
|
||||
continue
|
||||
if pipeline_version == "V5":
|
||||
print(f"Skipping instance {instance_id} with V5 since it's not supported yet.")
|
||||
continue
|
||||
if pipeline_version == "ASYNC_V4":
|
||||
print(f"Skipping instance {instance_id} with ASYNC_V4 since it's not supported yet.")
|
||||
continue
|
||||
|
||||
conv = ConvInstanceTemplateParams(
|
||||
spec,
|
||||
@@ -250,32 +288,6 @@ def parse_fwd_instances(instances, problem_name):
|
||||
convs.append(conv)
|
||||
return convs
|
||||
|
||||
def parse_instance_string(instance_string):
|
||||
"""Parse instance string, treating Seq(...) as a single parameter."""
|
||||
params = []
|
||||
current_param = ""
|
||||
paren_depth = 0
|
||||
|
||||
for char in instance_string:
|
||||
if char == '(':
|
||||
paren_depth += 1
|
||||
current_param += char
|
||||
elif char == ')':
|
||||
paren_depth -= 1
|
||||
current_param += char
|
||||
elif char == ',' and paren_depth == 0:
|
||||
# Only split on comma if we're not inside parentheses
|
||||
params.append(current_param.strip())
|
||||
current_param = ""
|
||||
else:
|
||||
current_param += char
|
||||
|
||||
# Add the last parameter
|
||||
if current_param.strip():
|
||||
params.append(current_param.strip())
|
||||
|
||||
return params
|
||||
|
||||
def parse_bwd_weight_instances(instances, problem_name):
|
||||
convs = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user