diff --git a/example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data.cpp b/example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data.cpp index 4f9362beb2..fa914a7119 100644 --- a/example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data.cpp +++ b/example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data.cpp @@ -11,199 +11,14 @@ #include "ck_tile/host.hpp" #include "grouped_convolution_utils.hpp" - -template , - typename DsLayout = ck_tile::tuple<>, - typename CDEElementWise = ck_tile::element_wise::PassThrough> -float grouped_conv_bwd_data(const ck_tile::GroupedConvBwdDataHostArgs& args, - const ck_tile::stream_config& s) -{ - constexpr int kBlockPerCu = 1; - - constexpr ck_tile::index_t M_Tile = 64; - constexpr ck_tile::index_t N_Tile = 64; - constexpr ck_tile::index_t K_Tile = 32; - - constexpr ck_tile::index_t M_Warp = 2; - constexpr ck_tile::index_t N_Warp = 2; - constexpr ck_tile::index_t K_Warp = 1; - - constexpr ck_tile::index_t M_Warp_Tile = GemmWarpConfig::M_Warp_Tile; - constexpr ck_tile::index_t N_Warp_Tile = GemmWarpConfig::N_Warp_Tile; - constexpr ck_tile::index_t K_Warp_Tile = GemmWarpConfig::K_Warp_Tile; - - constexpr ck_tile::index_t VectorSizeA = 1; - constexpr ck_tile::index_t VectorSizeB = 1; - constexpr ck_tile::index_t VectorSizeC = 8; - - // Implicit GEMM Traits - using CodegenShape = - ck_tile::TileGemmShape, - ck_tile::sequence, - ck_tile::sequence>; - - constexpr auto ConvSpec = ck_tile::ConvolutionSpecialization::Default; - using TilePartitioner = ck_tile::GemmTile1DPartitioner; - using GroupedConvTraitsType = ck_tile::GroupedConvTraits; - using CodegenPipelineProblem = ck_tile::GemmPipelineProblem< - InDataType, - WeiDataType, - AccDataType, - CodegenShape, - typename GroupedConvTraitsType::GroupedConvImplicitGemmTraitsBwdData, - ck_tile::element_wise::PassThrough, - ck_tile::element_wise::PassThrough, - InDataType, - true, - GroupedConvTraitsType::VectorSizeA, - GroupedConvTraitsType::VectorSizeB>; - using CodegenPipeline = ck_tile::GemmPipelineAGmemBGmemCRegV1; - - const auto Run = [&](const auto memory_operation_) { - constexpr auto memory_operation = memory_operation_.value; - - using ConvEpilogue = ck_tile::CShuffleEpilogue< - ck_tile::CShuffleEpilogueProblem>; - - using Kernel = ck_tile::GroupedConvolutionBackwardDataKernel; - auto kargs = Kernel::MakeKernelArgs(args); - - const dim3 grids = Kernel::GridSize(args); - const dim3 blocks = Kernel::BlockSize(); - - if(!Kernel::IsSupportedArgument(kargs)) - { - throw std::runtime_error("Wrong! Arguments not supported! Skipping conv!\n"); - } - - if(s.log_level_ > 0) - { - std::cout << "Launching kernel with args: " << Kernel::GetName() << '\n' - << "shape: " << CodegenShape::GetName() << '\n' - << "problem: " << CodegenPipelineProblem::GetName() << '\n' - << "pipeline: " << CodegenPipeline::GetName() << '\n' - << "grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}" - << ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z << "}" - << '\n' - << "Vector size A: " << CodegenPipeline::GetVectorSizeA() - << ", Vector size B: " << CodegenPipeline::GetVectorSizeB() - << ", Vector size C: " << ConvEpilogue::GetVectorSizeC() << std::endl; - } - - float ave_time = ck_tile::launch_kernel( - s, ck_tile::make_kernel(Kernel{}, grids, blocks, 0, kargs)); - - return ave_time; - }; - - if(args.k_batch == 1) - { - return Run(ck_tile::integral_constant{}); - } - else - { - return Run(ck_tile::integral_constant{}); - } -} - +#include "grouped_convolution_backward_data_invoker.hpp" #include "run_grouped_convolution_bwd_data_example.inc" -template -int run_grouped_conv_bwd_data_example_prec_type( - std::string in_layout, std::string wei_layout, std::string out_layout, int argc, char* argv[]) -{ - using NWGC = ck_tile::tensor_layout::convolution::NWGC; - using NHWGC = ck_tile::tensor_layout::convolution::NHWGC; - using NDHWGC = ck_tile::tensor_layout::convolution::NDHWGC; - - using GKXC = ck_tile::tensor_layout::convolution::GKXC; - using GKYXC = ck_tile::tensor_layout::convolution::GKYXC; - using GKZYXC = ck_tile::tensor_layout::convolution::GKZYXC; - - using NWGK = ck_tile::tensor_layout::convolution::NWGK; - using NHWGK = ck_tile::tensor_layout::convolution::NHWGK; - using NDHWGK = ck_tile::tensor_layout::convolution::NDHWGK; - - if(in_layout == "NWGC" && wei_layout == "GKXC" && out_layout == "NWGK") - { - return run_grouped_conv_bwd_data_example_with_layouts{}, - GemmWarpConfig, - InPrecType, - WeiPrecType, - OutPrecType>( - argc, argv, NWGC{}, GKXC{}, NWGK{}); - } - else if(in_layout == "NHWGC" && wei_layout == "GKYXC" && out_layout == "NHWGK") - { - return run_grouped_conv_bwd_data_example_with_layouts{}, - GemmWarpConfig, - InPrecType, - WeiPrecType, - OutPrecType>( - argc, argv, NHWGC{}, GKYXC{}, NHWGK{}); - } - else if(in_layout == "NDHWGC" && wei_layout == "GKZYXC" && out_layout == "NDHWGK") - { - return run_grouped_conv_bwd_data_example_with_layouts{}, - GemmWarpConfig, - InPrecType, - WeiPrecType, - OutPrecType>( - argc, argv, NDHWGC{}, GKZYXC{}, NDHWGK{}); - } - else - { - throw std::runtime_error("Unsupported memory layout!"); - } -} - template int run_grouped_conv_bwd_data_example(int argc, char* argv[]) { + using Invoker = GroupedConvolutionBackwardDataInvoker; + auto [result, arg_parser] = create_args(argc, argv); if(!result) return -1; @@ -215,12 +30,16 @@ int run_grouped_conv_bwd_data_example(int argc, char* argv[]) if(data_type == "fp16") { - return run_grouped_conv_bwd_data_example_prec_type( + return run_grouped_conv_bwd_data_example_prec_type( in_layout, wei_layout, out_layout, argc, argv); } else if(data_type == "bf16") { - return run_grouped_conv_bwd_data_example_prec_type( + return run_grouped_conv_bwd_data_example_prec_type( in_layout, wei_layout, out_layout, argc, argv); } else diff --git a/example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data_invoker.hpp b/example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data_invoker.hpp new file mode 100644 index 0000000000..1b3d45427d --- /dev/null +++ b/example/ck_tile/20_grouped_convolution/grouped_convolution_backward_data_invoker.hpp @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. +#pragma once + +#include "grouped_convolution_utils.hpp" + +struct GroupedConvolutionBackwardDataInvoker +{ + + template , + typename DsLayout = ck_tile::tuple<>, + typename CDEElementWise = ck_tile::element_wise::PassThrough> + static float grouped_conv_bwd_data(const ck_tile::GroupedConvBwdDataHostArgs& args, + const ck_tile::stream_config& s) + { + constexpr int kBlockPerCu = 1; + + constexpr ck_tile::index_t M_Tile = 64; + constexpr ck_tile::index_t N_Tile = 64; + constexpr ck_tile::index_t K_Tile = 32; + + constexpr ck_tile::index_t M_Warp = 2; + constexpr ck_tile::index_t N_Warp = 2; + constexpr ck_tile::index_t K_Warp = 1; + + constexpr ck_tile::index_t M_Warp_Tile = GemmWarpConfig::M_Warp_Tile; + constexpr ck_tile::index_t N_Warp_Tile = GemmWarpConfig::N_Warp_Tile; + constexpr ck_tile::index_t K_Warp_Tile = GemmWarpConfig::K_Warp_Tile; + + constexpr ck_tile::index_t VectorSizeA = 1; + constexpr ck_tile::index_t VectorSizeB = 1; + constexpr ck_tile::index_t VectorSizeC = 8; + + // Implicit GEMM Traits + using CodegenShape = + ck_tile::TileGemmShape, + ck_tile::sequence, + ck_tile::sequence>; + + constexpr auto ConvSpec = ck_tile::ConvolutionSpecialization::Default; + using TilePartitioner = ck_tile::GemmTile1DPartitioner; + using GroupedConvTraitsType = ck_tile::GroupedConvTraits; + using CodegenPipelineProblem = ck_tile::GemmPipelineProblem< + InDataType, + WeiDataType, + AccDataType, + CodegenShape, + typename GroupedConvTraitsType::GroupedConvImplicitGemmTraitsBwdData, + ck_tile::element_wise::PassThrough, + ck_tile::element_wise::PassThrough, + InDataType, + true, + GroupedConvTraitsType::VectorSizeA, + GroupedConvTraitsType::VectorSizeB>; + using CodegenPipeline = ck_tile::GemmPipelineAGmemBGmemCRegV1; + + const auto Run = [&](const auto memory_operation_) { + constexpr auto memory_operation = memory_operation_.value; + + using ConvEpilogue = ck_tile::CShuffleEpilogue>; + + using Kernel = ck_tile::GroupedConvolutionBackwardDataKernel; + auto kargs = Kernel::MakeKernelArgs(args); + + const dim3 grids = Kernel::GridSize(args); + const dim3 blocks = Kernel::BlockSize(); + + if(!Kernel::IsSupportedArgument(kargs)) + { + throw std::runtime_error("Wrong! Arguments not supported! Skipping conv!\n"); + } + + if(s.log_level_ > 0) + { + std::cout << "Launching kernel with args: " << Kernel::GetName() << '\n' + << "shape: " << CodegenShape::GetName() << '\n' + << "problem: " << CodegenPipelineProblem::GetName() << '\n' + << "pipeline: " << CodegenPipeline::GetName() << '\n' + << "grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}" + << ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z + << "}" << '\n' + << "Vector size A: " << CodegenPipeline::GetVectorSizeA() + << ", Vector size B: " << CodegenPipeline::GetVectorSizeB() + << ", Vector size C: " << ConvEpilogue::GetVectorSizeC() << std::endl; + } + + float ave_time = ck_tile::launch_kernel( + s, ck_tile::make_kernel(Kernel{}, grids, blocks, 0, kargs)); + + return ave_time; + }; + + if(args.k_batch == 1) + { + return Run(ck_tile::integral_constant{}); + } + else + { + return Run(ck_tile::integral_constant{}); + } + } +}; diff --git a/example/ck_tile/20_grouped_convolution/grouped_convolution_forward.cpp b/example/ck_tile/20_grouped_convolution/grouped_convolution_forward.cpp index cebfa90579..4cddbae3ab 100644 --- a/example/ck_tile/20_grouped_convolution/grouped_convolution_forward.cpp +++ b/example/ck_tile/20_grouped_convolution/grouped_convolution_forward.cpp @@ -11,190 +11,14 @@ #include "ck_tile/host.hpp" #include "grouped_convolution_utils.hpp" - -template , - typename DsLayout = ck_tile::tuple<>, - typename CDEElementWise = ck_tile::element_wise::PassThrough> -float grouped_conv_fwd(const ck_tile::GroupedConvFwdHostArgs& args, const ck_tile::stream_config& s) -{ - constexpr int kBlockPerCu = 1; - - constexpr ck_tile::index_t M_Tile = 64; - constexpr ck_tile::index_t N_Tile = 64; - constexpr ck_tile::index_t K_Tile = 64; - - constexpr ck_tile::index_t M_Warp = 2; - constexpr ck_tile::index_t N_Warp = 2; - constexpr ck_tile::index_t K_Warp = 1; - - constexpr ck_tile::index_t M_Warp_Tile = GemmWarpConfig::M_Warp_Tile; - constexpr ck_tile::index_t N_Warp_Tile = GemmWarpConfig::N_Warp_Tile; - constexpr ck_tile::index_t K_Warp_Tile = GemmWarpConfig::K_Warp_Tile; - - constexpr ck_tile::index_t VectorSizeA = 8; - constexpr ck_tile::index_t VectorSizeB = 8; - constexpr ck_tile::index_t VectorSizeC = 8; - - // Implicit GEMM Traits - using CodegenShape = - ck_tile::TileGemmShape, - ck_tile::sequence, - ck_tile::sequence>; - - constexpr auto ConvSpec = ck_tile::ConvolutionSpecialization::Default; - using TilePartitioner = ck_tile::GemmTile1DPartitioner; - using GroupedConvTraitsType = ck_tile::GroupedConvTraits; - using CodegenPipelineProblem = ck_tile::GemmPipelineProblem< - InDataType, - WeiDataType, - AccDataType, - CodegenShape, - typename GroupedConvTraitsType::GroupedConvImplicitGemmTraitsFwd, - ck_tile::element_wise::PassThrough, - ck_tile::element_wise::PassThrough, - InDataType, - true, - GroupedConvTraitsType::VectorSizeA, - GroupedConvTraitsType::VectorSizeB>; - using CodegenPipeline = ck_tile::GemmPipelineAGmemBGmemCRegV1; - - const auto Run = [&](const auto memory_operation_) { - constexpr auto memory_operation = memory_operation_.value; - - using ConvEpilogue = ck_tile::CShuffleEpilogue< - ck_tile::CShuffleEpilogueProblem>; - - using Kernel = ck_tile::GroupedConvolutionForwardKernel; - auto kargs = Kernel::MakeKernelArgs(args); - - const dim3 grids = Kernel::GridSize(kargs); - const dim3 blocks = Kernel::BlockSize(); - - if(!Kernel::IsSupportedArgument(kargs)) - { - throw std::runtime_error("Wrong! Arguments not supported! Skipping conv!\n"); - } - - if(s.log_level_ > 0) - { - std::cout << "Launching kernel with args: " << Kernel::GetName() << '\n' - << "shape: " << CodegenShape::GetName() << '\n' - << "problem: " << CodegenPipelineProblem::GetName() << '\n' - << "pipeline: " << CodegenPipeline::GetName() << '\n' - << "grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}" - << ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z << "}" - << '\n' - << "Vector size A: " << CodegenPipeline::GetVectorSizeA() - << ", Vector size B: " << CodegenPipeline::GetVectorSizeB() - << ", Vector size C: " << ConvEpilogue::GetVectorSizeC() << std::endl; - } - - float ave_time = ck_tile::launch_kernel( - s, ck_tile::make_kernel(Kernel{}, grids, blocks, 0, kargs)); - - return ave_time; - }; - - return Run(ck_tile::integral_constant{}); -} - +#include "grouped_convolution_forward_invoker.hpp" #include "run_grouped_convolution_fwd_example.inc" -template -int run_grouped_conv_fwd_example_prec_type( - std::string in_layout, std::string wei_layout, std::string out_layout, int argc, char* argv[]) -{ - using NWGC = ck_tile::tensor_layout::convolution::NWGC; - using NHWGC = ck_tile::tensor_layout::convolution::NHWGC; - using NDHWGC = ck_tile::tensor_layout::convolution::NDHWGC; - - using GKXC = ck_tile::tensor_layout::convolution::GKXC; - using GKYXC = ck_tile::tensor_layout::convolution::GKYXC; - using GKZYXC = ck_tile::tensor_layout::convolution::GKZYXC; - - using NWGK = ck_tile::tensor_layout::convolution::NWGK; - using NHWGK = ck_tile::tensor_layout::convolution::NHWGK; - using NDHWGK = ck_tile::tensor_layout::convolution::NDHWGK; - - if(in_layout == "NWGC" && wei_layout == "GKXC" && out_layout == "NWGK") - { - return run_grouped_conv_fwd_example_with_layouts{}, - GemmWarpConfig, - InPrecType, - WeiPrecType, - OutPrecType>( - argc, argv, NWGC{}, GKXC{}, NWGK{}); - } - else if(in_layout == "NHWGC" && wei_layout == "GKYXC" && out_layout == "NHWGK") - { - return run_grouped_conv_fwd_example_with_layouts{}, - GemmWarpConfig, - InPrecType, - WeiPrecType, - OutPrecType>( - argc, argv, NHWGC{}, GKYXC{}, NHWGK{}); - } - else if(in_layout == "NDHWGC" && wei_layout == "GKZYXC" && out_layout == "GKZYXC") - { - return run_grouped_conv_fwd_example_with_layouts{}, - GemmWarpConfig, - InPrecType, - WeiPrecType, - OutPrecType>( - argc, argv, NDHWGC{}, GKZYXC{}, NDHWGK{}); - } - else - { - throw std::runtime_error("Unsupported memory layout!"); - } -} - template int run_grouped_conv_fwd_example(int argc, char* argv[]) { + using Invoker = GroupedConvolutionForwardInvoker; + auto [result, arg_parser] = create_args(argc, argv); if(!result) return -1; @@ -206,12 +30,12 @@ int run_grouped_conv_fwd_example(int argc, char* argv[]) if(data_type == "fp16") { - return run_grouped_conv_fwd_example_prec_type( + return run_grouped_conv_fwd_example_prec_type( in_layout, wei_layout, out_layout, argc, argv); } else if(data_type == "bf16") { - return run_grouped_conv_fwd_example_prec_type( + return run_grouped_conv_fwd_example_prec_type( in_layout, wei_layout, out_layout, argc, argv); } else diff --git a/example/ck_tile/20_grouped_convolution/grouped_convolution_forward_invoker.hpp b/example/ck_tile/20_grouped_convolution/grouped_convolution_forward_invoker.hpp new file mode 100644 index 0000000000..0b9879d247 --- /dev/null +++ b/example/ck_tile/20_grouped_convolution/grouped_convolution_forward_invoker.hpp @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. +#pragma once + +#include "grouped_convolution_utils.hpp" + +struct GroupedConvolutionForwardInvoker +{ + template , + typename DsLayout = ck_tile::tuple<>, + typename CDEElementWise = ck_tile::element_wise::PassThrough> + static float grouped_conv_fwd(const ck_tile::GroupedConvFwdHostArgs& args, + const ck_tile::stream_config& s) + { + constexpr int kBlockPerCu = 1; + + constexpr ck_tile::index_t M_Tile = 64; + constexpr ck_tile::index_t N_Tile = 64; + constexpr ck_tile::index_t K_Tile = 64; + + constexpr ck_tile::index_t M_Warp = 2; + constexpr ck_tile::index_t N_Warp = 2; + constexpr ck_tile::index_t K_Warp = 1; + + constexpr ck_tile::index_t M_Warp_Tile = GemmWarpConfig::M_Warp_Tile; + constexpr ck_tile::index_t N_Warp_Tile = GemmWarpConfig::N_Warp_Tile; + constexpr ck_tile::index_t K_Warp_Tile = GemmWarpConfig::K_Warp_Tile; + + constexpr ck_tile::index_t VectorSizeA = 8; + constexpr ck_tile::index_t VectorSizeB = 8; + constexpr ck_tile::index_t VectorSizeC = 8; + + // Implicit GEMM Traits + using CodegenShape = + ck_tile::TileGemmShape, + ck_tile::sequence, + ck_tile::sequence>; + + constexpr auto ConvSpec = ck_tile::ConvolutionSpecialization::Default; + using TilePartitioner = ck_tile::GemmTile1DPartitioner; + using GroupedConvTraitsType = ck_tile::GroupedConvTraits; + using CodegenPipelineProblem = ck_tile::GemmPipelineProblem< + InDataType, + WeiDataType, + AccDataType, + CodegenShape, + typename GroupedConvTraitsType::GroupedConvImplicitGemmTraitsFwd, + ck_tile::element_wise::PassThrough, + ck_tile::element_wise::PassThrough, + InDataType, + true, + GroupedConvTraitsType::VectorSizeA, + GroupedConvTraitsType::VectorSizeB>; + using CodegenPipeline = ck_tile::GemmPipelineAGmemBGmemCRegV1; + + const auto Run = [&](const auto memory_operation_) { + constexpr auto memory_operation = memory_operation_.value; + + using ConvEpilogue = ck_tile::CShuffleEpilogue>; + + using Kernel = ck_tile::GroupedConvolutionForwardKernel; + auto kargs = Kernel::MakeKernelArgs(args); + + const dim3 grids = Kernel::GridSize(kargs); + const dim3 blocks = Kernel::BlockSize(); + + if(!Kernel::IsSupportedArgument(kargs)) + { + throw std::runtime_error("Wrong! Arguments not supported! Skipping conv!\n"); + } + + if(s.log_level_ > 0) + { + std::cout << "Launching kernel with args: " << Kernel::GetName() << '\n' + << "shape: " << CodegenShape::GetName() << '\n' + << "problem: " << CodegenPipelineProblem::GetName() << '\n' + << "pipeline: " << CodegenPipeline::GetName() << '\n' + << "grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}" + << ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z + << "}" << '\n' + << "Vector size A: " << CodegenPipeline::GetVectorSizeA() + << ", Vector size B: " << CodegenPipeline::GetVectorSizeB() + << ", Vector size C: " << ConvEpilogue::GetVectorSizeC() << std::endl; + } + + float ave_time = ck_tile::launch_kernel( + s, ck_tile::make_kernel(Kernel{}, grids, blocks, 0, kargs)); + + return ave_time; + }; + + return Run(ck_tile::integral_constant{}); + } +}; diff --git a/example/ck_tile/20_grouped_convolution/run_grouped_convolution_bwd_data_example.inc b/example/ck_tile/20_grouped_convolution/run_grouped_convolution_bwd_data_example.inc index 8519daaac2..3d7635bf4f 100644 --- a/example/ck_tile/20_grouped_convolution/run_grouped_convolution_bwd_data_example.inc +++ b/example/ck_tile/20_grouped_convolution/run_grouped_convolution_bwd_data_example.inc @@ -4,6 +4,7 @@ template ( + float ave_time = Invoker::template grouped_conv_bwd_data( args, ck_tile::stream_config{nullptr, true, 1, n_warmup, n_repeat}); std::size_t flop = args.GetFlops(); @@ -39,6 +40,7 @@ float invoke_grouped_conv_bwd_data(ck_tile::GroupedConvBwdDataHostArgs& args, template +int run_grouped_conv_bwd_data_example_prec_type( + std::string in_layout, std::string wei_layout, std::string out_layout, int argc, char* argv[]) +{ + using NWGC = ck_tile::tensor_layout::convolution::NWGC; + using NHWGC = ck_tile::tensor_layout::convolution::NHWGC; + using NDHWGC = ck_tile::tensor_layout::convolution::NDHWGC; + + using GKXC = ck_tile::tensor_layout::convolution::GKXC; + using GKYXC = ck_tile::tensor_layout::convolution::GKYXC; + using GKZYXC = ck_tile::tensor_layout::convolution::GKZYXC; + + using NWGK = ck_tile::tensor_layout::convolution::NWGK; + using NHWGK = ck_tile::tensor_layout::convolution::NHWGK; + using NDHWGK = ck_tile::tensor_layout::convolution::NDHWGK; + + if(in_layout == "NWGC" && wei_layout == "GKXC" && out_layout == "NWGK") + { + return run_grouped_conv_bwd_data_example_with_layouts{}, + GemmWarpConfig, + Invoker, + InPrecType, + WeiPrecType, + OutPrecType>( + argc, argv, NWGC{}, GKXC{}, NWGK{}); + } + else if(in_layout == "NHWGC" && wei_layout == "GKYXC" && out_layout == "NHWGK") + { + return run_grouped_conv_bwd_data_example_with_layouts{}, + GemmWarpConfig, + Invoker, + InPrecType, + WeiPrecType, + OutPrecType>( + argc, argv, NHWGC{}, GKYXC{}, NHWGK{}); + } + else if(in_layout == "NDHWGC" && wei_layout == "GKZYXC" && out_layout == "NDHWGK") + { + return run_grouped_conv_bwd_data_example_with_layouts{}, + GemmWarpConfig, + Invoker, + InPrecType, + WeiPrecType, + OutPrecType>( + argc, argv, NDHWGC{}, GKZYXC{}, NDHWGK{}); + } + else + { + throw std::runtime_error("Unsupported memory layout!"); + } +} diff --git a/example/ck_tile/20_grouped_convolution/run_grouped_convolution_fwd_example.inc b/example/ck_tile/20_grouped_convolution/run_grouped_convolution_fwd_example.inc index c5ae92a0da..beb6005e19 100644 --- a/example/ck_tile/20_grouped_convolution/run_grouped_convolution_fwd_example.inc +++ b/example/ck_tile/20_grouped_convolution/run_grouped_convolution_fwd_example.inc @@ -4,6 +4,7 @@ template ( + float ave_time = Invoker::template grouped_conv_fwd( args, ck_tile::stream_config{nullptr, true, 1, n_warmup, n_repeat}); std::size_t flop = args.GetFlops(); @@ -39,6 +40,7 @@ float invoke_grouped_conv_fwd(const ck_tile::GroupedConvFwdHostArgs& args, template +int run_grouped_conv_fwd_example_prec_type( + std::string in_layout, std::string wei_layout, std::string out_layout, int argc, char* argv[]) +{ + using NWGC = ck_tile::tensor_layout::convolution::NWGC; + using NHWGC = ck_tile::tensor_layout::convolution::NHWGC; + using NDHWGC = ck_tile::tensor_layout::convolution::NDHWGC; + + using GKXC = ck_tile::tensor_layout::convolution::GKXC; + using GKYXC = ck_tile::tensor_layout::convolution::GKYXC; + using GKZYXC = ck_tile::tensor_layout::convolution::GKZYXC; + + using NWGK = ck_tile::tensor_layout::convolution::NWGK; + using NHWGK = ck_tile::tensor_layout::convolution::NHWGK; + using NDHWGK = ck_tile::tensor_layout::convolution::NDHWGK; + + if(in_layout == "NWGC" && wei_layout == "GKXC" && out_layout == "NWGK") + { + return run_grouped_conv_fwd_example_with_layouts{}, + GemmWarpConfig, + Invoker, + InPrecType, + WeiPrecType, + OutPrecType>( + argc, argv, NWGC{}, GKXC{}, NWGK{}); + } + else if(in_layout == "NHWGC" && wei_layout == "GKYXC" && out_layout == "NHWGK") + { + return run_grouped_conv_fwd_example_with_layouts{}, + GemmWarpConfig, + Invoker, + InPrecType, + WeiPrecType, + OutPrecType>( + argc, argv, NHWGC{}, GKYXC{}, NHWGK{}); + } + else if(in_layout == "NDHWGC" && wei_layout == "GKZYXC" && out_layout == "NDHWGK") + { + return run_grouped_conv_fwd_example_with_layouts{}, + GemmWarpConfig, + Invoker, + InPrecType, + WeiPrecType, + OutPrecType>( + argc, argv, NDHWGC{}, GKZYXC{}, NDHWGK{}); + } + else + { + throw std::runtime_error("Unsupported memory layout!"); + } +}