From a708b177fcd8567282b630df262be335c5431d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= Date: Fri, 17 Oct 2025 06:24:11 +0000 Subject: [PATCH] Add double smem buffer instances. --- ...grouped_conv_bwd_weight_bf16_instances.hpp | 18 ++++++++- ...grouped_conv_bwd_weight_fp16_instances.hpp | 39 +++++++++++++++---- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp index 189c3a0a64..770acb1e47 100644 --- a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp +++ b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp @@ -33,7 +33,23 @@ using tile_grouped_conv_bwd_weight_bf16_instances = std::tuple< GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker // clang-format on >; diff --git a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp index 8dfeec6a4d..cb9e432bed 100644 --- a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp +++ b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp @@ -19,14 +19,37 @@ using tile_grouped_conv_bwd_weight_f16_instances = std::tuple< //#####################################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM| //#####################################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline| //#####################################| Spatial| | | | | | | Operation| Operation| Operation| CU| | | | | | | size| size| size| A| B| C| buffer| version| - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - // GroupedConvolutionBackwardWeightInvoker, - // GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker // clang-format on >;