diff --git a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp index 4d1a032d4e..a2d9fc358e 100644 --- a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp +++ b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_bf16_instances.hpp @@ -32,17 +32,15 @@ using tile_grouped_conv_bwd_weight_bf16_instances = std::tuple< //#####################################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM| //#####################################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline| //#####################################| Spatial| | | | | | | Operation| Operation| Operation| CU| | | | | | | size| size| size| A| B| C| buffer| version| +#if defined(__gfx950__) GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, @@ -52,17 +50,37 @@ using tile_grouped_conv_bwd_weight_bf16_instances = std::tuple< GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker + GroupedConvolutionBackwardWeightInvoker, +#endif + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker // clang-format on >; diff --git a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp index 34ab68ae05..29fa86120f 100644 --- a/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp +++ b/library/include/ck_tile/library/tensor_operation_instance/gpu/tile_grouped_conv_bwd_weight_fp16_instances.hpp @@ -32,17 +32,15 @@ using tile_grouped_conv_bwd_weight_f16_instances = std::tuple< //#####################################| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| In| Wei| Out| K-block| M-tile| N-tile | K-tile | M-warp| N-warp| K-warp| M-warp| N-warp| K-warp| Vector| Vector| Vector| Double| GEMM| //#####################################| Dim| | | | Type| Type| Type| Elementwise| Elementwise| Elementwise| per| | | | | | | tile| tile| tile| size| size| size| smem| pipeline| //#####################################| Spatial| | | | | | | Operation| Operation| Operation| CU| | | | | | | size| size| size| A| B| C| buffer| version| +#if defined(__gfx950__) GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, @@ -52,17 +50,37 @@ using tile_grouped_conv_bwd_weight_f16_instances = std::tuple< GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, GroupedConvolutionBackwardWeightInvoker, - GroupedConvolutionBackwardWeightInvoker + GroupedConvolutionBackwardWeightInvoker, +#endif + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker, + GroupedConvolutionBackwardWeightInvoker // clang-format on >; diff --git a/profiler/ck_tile/src/tile_profile_grouped_conv_bwd_weight.cpp b/profiler/ck_tile/src/tile_profile_grouped_conv_bwd_weight.cpp index d29df87ea6..6e56ece96b 100644 --- a/profiler/ck_tile/src/tile_profile_grouped_conv_bwd_weight.cpp +++ b/profiler/ck_tile/src/tile_profile_grouped_conv_bwd_weight.cpp @@ -115,9 +115,6 @@ int tile_profile_grouped_conv_bwd_weight(int argc, char* argv[]) using BF16 = ck_tile::bfloat16_t; using F8 = ck_tile::fp8_t; using BF8 = ck_tile::bf8_t; -#if defined(__gfx942__) - using TF32 = ck::tf32_t; -#endif using NHWGC = ck_tile::tensor_layout::convolution::NHWGC; using NDHWGC = ck_tile::tensor_layout::convolution::NDHWGC; @@ -186,12 +183,6 @@ int tile_profile_grouped_conv_bwd_weight(int argc, char* argv[]) { return profile(I2, NHWGC{}, GKYXC{}, NHWGK{}, BF16{}, BF16{}, BF16{}, BF16{}, BF16{}); } - else if(data_type == ConvDataType::F32_F32_F32_TF32) - { -#if defined(__gfx942__) - return profile(I2, NHWGC{}, GKYXC{}, NHWGK{}, F32{}, F32{}, F32{}, TF32{}, TF32{}); -#endif - } } if(num_dim_spatial == 3 && layout == ConvLayout::NHWGC_GKYXC_NHWGK) @@ -223,12 +214,6 @@ int tile_profile_grouped_conv_bwd_weight(int argc, char* argv[]) return profile( I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, int8_t{}, int8_t{}, int8_t{}, int8_t{}, int8_t{}); } - else if(data_type == ConvDataType::F32_F32_F32_TF32) - { -#if defined(__gfx942__) - return profile(I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, F32{}, F32{}, F32{}, TF32{}, TF32{}); -#endif - } } std::cout << "this data_type & layout is not implemented" << std::endl;