mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-07-01 20:27:42 +00:00
Merge branch 'vpietila/ck-vs-ck-tile-conv-benchmarking' of github.com:ROCm/composable_kernel into vpietila/ck-vs-ck-tile-conv-benchmarking
This commit is contained in:
@@ -32,17 +32,15 @@ using tile_grouped_conv_bwd_weight_bf16_instances = std::tuple<
|
||||
//#####################################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM|
|
||||
//#####################################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline|
|
||||
//#####################################| Spatial| | | | | | | Operation| Operation| Operation| CU| | | | | | | size| size| size| A| B| C| buffer| version|
|
||||
#if defined(__gfx950__)
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 8, 2, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 2, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 4, 4, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 8, 2, 2, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 16, 2, 2, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 16, 16, 32, 2, 2, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
@@ -52,17 +50,37 @@ using tile_grouped_conv_bwd_weight_bf16_instances = std::tuple<
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 8, 2, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 2, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 4, 4, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 8, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 16, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 16, 16, 32, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
#endif
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 1, 1, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 1, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, BF16, BF16, BF16, PassThrough, PassThrough, PassThrough, 2, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
|
||||
@@ -32,17 +32,15 @@ using tile_grouped_conv_bwd_weight_f16_instances = std::tuple<
|
||||
//#####################################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM|
|
||||
//#####################################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline|
|
||||
//#####################################| Spatial| | | | | | | Operation| Operation| Operation| CU| | | | | | | size| size| size| A| B| C| buffer| version|
|
||||
#if defined(__gfx950__)
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 2, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 8, 2, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 2, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 4, 4, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 8, 2, 2, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 16, 2, 2, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 16, 16, 32, 2, 2, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
@@ -52,17 +50,37 @@ using tile_grouped_conv_bwd_weight_f16_instances = std::tuple<
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 8, 2, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 2, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 4, 4, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 8, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 16, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 16, 16, 32, 2, 2, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 4, 4, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
#endif
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 64, 64, 64, 1, 1, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 1, 1, 1, 32, 32, 16, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, false, CK_TILE_PIPELINE_COMPUTE_V3>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 2, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 64, 64, 64, 2, 2, 1, 16, 16, 32, 8, 8, 8, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 16, 16, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 1, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>,
|
||||
GroupedConvolutionBackwardWeightInvoker<NDimSpatial, ALayout, BLayout, ELayout, F16, F16, F16, PassThrough, PassThrough, PassThrough, 2, 32, 32, 64, 1, 1, 1, 16, 16, 32, 8, 8, 4, true, CK_TILE_PIPELINE_COMPUTE_V4>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
|
||||
@@ -115,9 +115,6 @@ int tile_profile_grouped_conv_bwd_weight(int argc, char* argv[])
|
||||
using BF16 = ck_tile::bfloat16_t;
|
||||
using F8 = ck_tile::fp8_t;
|
||||
using BF8 = ck_tile::bf8_t;
|
||||
#if defined(__gfx942__)
|
||||
using TF32 = ck::tf32_t;
|
||||
#endif
|
||||
|
||||
using NHWGC = ck_tile::tensor_layout::convolution::NHWGC;
|
||||
using NDHWGC = ck_tile::tensor_layout::convolution::NDHWGC;
|
||||
@@ -186,12 +183,6 @@ int tile_profile_grouped_conv_bwd_weight(int argc, char* argv[])
|
||||
{
|
||||
return profile(I2, NHWGC{}, GKYXC{}, NHWGK{}, BF16{}, BF16{}, BF16{}, BF16{}, BF16{});
|
||||
}
|
||||
else if(data_type == ConvDataType::F32_F32_F32_TF32)
|
||||
{
|
||||
#if defined(__gfx942__)
|
||||
return profile(I2, NHWGC{}, GKYXC{}, NHWGK{}, F32{}, F32{}, F32{}, TF32{}, TF32{});
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if(num_dim_spatial == 3 && layout == ConvLayout::NHWGC_GKYXC_NHWGK)
|
||||
@@ -223,12 +214,6 @@ int tile_profile_grouped_conv_bwd_weight(int argc, char* argv[])
|
||||
return profile(
|
||||
I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, int8_t{}, int8_t{}, int8_t{}, int8_t{}, int8_t{});
|
||||
}
|
||||
else if(data_type == ConvDataType::F32_F32_F32_TF32)
|
||||
{
|
||||
#if defined(__gfx942__)
|
||||
return profile(I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, F32{}, F32{}, F32{}, TF32{}, TF32{});
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "this data_type & layout is not implemented" << std::endl;
|
||||
|
||||
Reference in New Issue
Block a user