Add more instances.

This commit is contained in:
Ville Pietilä
2025-10-15 14:33:01 +00:00
parent 96a7c26a0b
commit a5b60ed2f2
2 changed files with 132 additions and 7 deletions

View File

@@ -22,14 +22,143 @@ using tile_grouped_conv_bwd_weight_bf16_instances = std::tuple<
//#####################################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| per| | | | | | | tile| tile| tile| size| size| size|
//#####################################| Spatial| | | | | | | Operation| Operation| Operation| CU| | | | | | | size| size| size| A| B| C|
//#####################################| | | | | | | | | | |
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 8>
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 1, 8, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 8, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 4, 8, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 1, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 2, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 4>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 4, 8>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 1>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 4>,
//#####################################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector|
//#####################################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| per| | | | | | | tile| tile| tile| size| size| size|
//#####################################| Spatial| | | | | | | Operation| Operation| Operation| CU| | | | | | | size| size| size| A| B| C|
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 8, 2, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 2, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 8, 128, 64, 2, 2, 1, 4, 64, 16, 2, 4, 2>,
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 128, 8, 64, 2, 2, 1, 64, 4, 16, 2, 4, 2>
// // clang-format on
// clang-format on
>;

View File

@@ -119,16 +119,12 @@ int tile_profile_grouped_conv_bwd_weight(int argc, char* argv[])
using TF32 = ck::tf32_t;
#endif
//using namespace ck::tensor_layout::convolution;
//using NWGC = ck_tile::tensor_layout::convolution::NWGC;
using NHWGC = ck_tile::tensor_layout::convolution::NHWGC;
using NDHWGC = ck_tile::tensor_layout::convolution::NDHWGC;
//using GKXC = ck_tile::tensor_layout::convolution::GKXC;
using GKYXC = ck_tile::tensor_layout::convolution::GKYXC;
using GKZYXC = ck_tile::tensor_layout::convolution::GKZYXC;
//using NWGK = ck_tile::tensor_layout::convolution::NWGK;
using NHWGK = ck_tile::tensor_layout::convolution::NHWGK;
using NDHWGK = ck_tile::tensor_layout::convolution::NDHWGK;