mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 20:40:07 +00:00
Add client example of grouped conv2d backward weight (data type: fp16) (#498)
* Remove redundant CMake setting
* Extract common code from files
* Rename folder 'convnd' to 'conv'
* Use std::array<> to accept compile-time kwnown # of arguments
* Fix compilation error of tuning parameter
* In example, use same setting as unit-test
* Remove no-longer used include directive
* Add interface for grouped conv bwd weight
* Add group support for conv bwd weight
* Add grouped conv bwd weight example
* Use group parameter in example
* Rename example folder
* Remove non-grouped version example source files
* Rename device op template
* Add group support to convolution backward weight
* Remove debug messages
* Use smaller group size in example
* Use named variable as loop terminate condition
* Prettify example output message
* Enlarge used grid size
* Allow real grid size exceeds expected grid size
* Rename interface file
* Add client example for grouped conv2d bwd weight
* Fix wrong include directive
* Rename client example folder
[ROCm/composable_kernel commit: 38470e0497]
This commit is contained in:
@@ -20,8 +20,8 @@ set(PROFILER_SOURCE
|
||||
src/profile_conv_fwd_bias_relu.cpp
|
||||
src/profile_conv_fwd_bias_relu_add.cpp
|
||||
src/profile_conv_bwd_data.cpp
|
||||
src/profile_conv_bwd_weight.cpp
|
||||
src/profile_grouped_conv_fwd.cpp
|
||||
src/profile_grouped_conv_bwd_weight.cpp
|
||||
src/profile_reduce.cpp
|
||||
src/profile_groupnorm.cpp
|
||||
src/profile_layernorm.cpp
|
||||
@@ -49,9 +49,9 @@ target_link_libraries(ckProfiler PRIVATE device_grouped_conv3d_fwd_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv1d_bwd_data_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_data_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv3d_bwd_data_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv1d_bwd_weight_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_weight_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv3d_bwd_weight_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_grouped_conv1d_bwd_weight_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_grouped_conv2d_bwd_weight_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_grouped_conv3d_bwd_weight_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance)
|
||||
target_link_libraries(ckProfiler PRIVATE device_normalization_instance)
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <typeinfo>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
@@ -13,7 +14,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/device_conv_fwd.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/gpu/convolution_backward_weight.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
@@ -26,32 +27,6 @@
|
||||
namespace ck {
|
||||
namespace profiler {
|
||||
|
||||
template <typename DataType>
|
||||
void show_data_nhwc_layout(Tensor<DataType>& nhwc)
|
||||
{
|
||||
std::cout << "[";
|
||||
for(int n = 0; n < ck::type_convert<int>(nhwc.mDesc.GetLengths()[0]); n++)
|
||||
{
|
||||
std::cout << "[";
|
||||
for(int hi = 0; hi < ck::type_convert<int>(nhwc.mDesc.GetLengths()[2]); hi++)
|
||||
{
|
||||
std::cout << "[";
|
||||
for(int wi = 0; wi < ck::type_convert<int>(nhwc.mDesc.GetLengths()[3]); wi++)
|
||||
{
|
||||
std::cout << "[";
|
||||
for(int c = 0; c < ck::type_convert<int>(nhwc.mDesc.GetLengths()[1]); c++)
|
||||
{
|
||||
std::cout << static_cast<float>(nhwc(n, c, hi, wi)) << " ";
|
||||
}
|
||||
std::cout << "]";
|
||||
}
|
||||
std::cout << "]";
|
||||
}
|
||||
std::cout << "]";
|
||||
}
|
||||
std::cout << "]";
|
||||
}
|
||||
|
||||
template <ck::index_t NDimSpatial,
|
||||
typename InLayout,
|
||||
typename WeiLayout,
|
||||
@@ -59,12 +34,12 @@ template <ck::index_t NDimSpatial,
|
||||
typename InDataType,
|
||||
typename WeiDataType,
|
||||
typename OutDataType>
|
||||
bool profile_conv_bwd_weight_impl(int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
ck::index_t split_k)
|
||||
bool profile_grouped_conv_bwd_weight_impl(int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
ck::index_t split_k)
|
||||
{
|
||||
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
@@ -114,16 +89,14 @@ bool profile_conv_bwd_weight_impl(int do_verification,
|
||||
|
||||
if(do_verification)
|
||||
{
|
||||
auto ref_conv = ck::tensor_operation::host::ReferenceConvBwdWeight<NDimSpatial,
|
||||
auto ref_conv = ck::tensor_operation::host::ReferenceConvBwdWeight<NDimSpatial,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>{};
|
||||
|
||||
auto ref_invoker = ref_conv.MakeInvoker();
|
||||
|
||||
auto ref_invoker = ref_conv.MakeInvoker();
|
||||
auto ref_argument = ref_conv.MakeArgument(input,
|
||||
weight_host_result,
|
||||
output,
|
||||
@@ -138,16 +111,16 @@ bool profile_conv_bwd_weight_impl(int do_verification,
|
||||
ref_invoker.Run(ref_argument);
|
||||
}
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceConvBwdWeight<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvBwdWeight<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
@@ -163,22 +136,41 @@ bool profile_conv_bwd_weight_impl(int do_verification,
|
||||
// profile device Conv instances
|
||||
bool all_pass = true;
|
||||
|
||||
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial> output_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
|
||||
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
std::array<ck::index_t, NDimSpatial> input_right_pads{};
|
||||
|
||||
auto range_copy = [](const auto& from, auto to) { std::copy(begin(from), end(from), to); };
|
||||
|
||||
range_copy(conv_param.input_spatial_lengths_, begin(input_spatial_lengths));
|
||||
range_copy(conv_param.filter_spatial_lengths_, begin(filter_spatial_lengths));
|
||||
range_copy(conv_param.output_spatial_lengths_, begin(output_spatial_lengths));
|
||||
range_copy(conv_param.conv_filter_strides_, begin(conv_filter_strides));
|
||||
range_copy(conv_param.conv_filter_dilations_, begin(conv_filter_dilations));
|
||||
range_copy(conv_param.input_left_pads_, begin(input_left_pads));
|
||||
range_copy(conv_param.input_right_pads_, begin(input_right_pads));
|
||||
|
||||
for(auto& op_ptr : op_ptrs)
|
||||
{
|
||||
auto argument_ptr =
|
||||
op_ptr->MakeArgumentPointer(static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
|
||||
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
|
||||
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
|
||||
conv_param.G_,
|
||||
conv_param.N_,
|
||||
conv_param.K_,
|
||||
conv_param.C_,
|
||||
conv_param.input_spatial_lengths_,
|
||||
conv_param.filter_spatial_lengths_,
|
||||
conv_param.output_spatial_lengths_,
|
||||
conv_param.conv_filter_strides_,
|
||||
conv_param.conv_filter_dilations_,
|
||||
conv_param.input_left_pads_,
|
||||
conv_param.input_right_pads_,
|
||||
input_spatial_lengths,
|
||||
filter_spatial_lengths,
|
||||
output_spatial_lengths,
|
||||
conv_filter_strides,
|
||||
conv_filter_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads,
|
||||
in_element_op,
|
||||
wei_element_op,
|
||||
out_element_op,
|
||||
@@ -218,32 +210,29 @@ bool profile_conv_bwd_weight_impl(int do_verification,
|
||||
wei_device_buf.FromDevice(weight_device_result.mData.data());
|
||||
|
||||
bool pass =
|
||||
ck::utils::check_err(weight_host_result.mData, weight_device_result.mData);
|
||||
ck::utils::check_err(weight_device_result.mData, weight_host_result.mData);
|
||||
|
||||
if(!pass)
|
||||
{
|
||||
std::cout << "Fail info:" << op_ptr->GetTypeString() << std::endl;
|
||||
std::cout << "Fail info: " << op_ptr->GetTypeString() << std::endl;
|
||||
}
|
||||
|
||||
all_pass &= pass;
|
||||
|
||||
if(do_log)
|
||||
{
|
||||
std::cout << "in : ";
|
||||
show_data_nhwc_layout(output);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "wei: ";
|
||||
show_data_nhwc_layout(weight_host_result);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "out : ";
|
||||
show_data_nhwc_layout(input);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "wei_device: ";
|
||||
show_data_nhwc_layout(weight_device_result);
|
||||
std::cout << std::endl;
|
||||
LogRangeAsType<float>(std::cout << "output : ", output.mData, ",") << std::endl;
|
||||
;
|
||||
LogRangeAsType<float>(
|
||||
std::cout << "weight (device): ", weight_device_result.mData, ",")
|
||||
<< std::endl;
|
||||
;
|
||||
LogRangeAsType<float>(
|
||||
std::cout << "weight (host): ", weight_host_result.mData, ",")
|
||||
<< std::endl;
|
||||
;
|
||||
LogRangeAsType<float>(std::cout << "input: ", input.mData, ",") << std::endl;
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,19 +1,19 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
#include <initializer_list>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
|
||||
#include "profiler/include/profile_grouped_conv_bwd_weight_impl.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
enum struct ConvLayout
|
||||
{
|
||||
NCHW_KCYX_NKHW, // 0
|
||||
NHWC_KYXC_NHWK, // 1
|
||||
GNCHW_GKCYX_GNKHW, // 0
|
||||
GNHWC_GKYXC_GNHWK, // 1
|
||||
};
|
||||
|
||||
enum struct ConvDataType
|
||||
@@ -25,24 +25,25 @@ enum struct ConvDataType
|
||||
|
||||
static void print_helper_msg()
|
||||
{
|
||||
std::cout
|
||||
<< "arg1: tensor operation (conv_bwd_weight: Convolution Backward Weight\n"
|
||||
<< "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
|
||||
<< " 1: Input fp16, Weight fp16, Output fp16\n"
|
||||
<< " 2: Input bf16, Weight fp32, Output bf16)\n"
|
||||
<< "arg3: tensor layout (0: Input[N, C, Hi, Wi], Weight[K, C, Y, X], Output[N, K, Ho, Wo]\n"
|
||||
<< " 1: Input[N, Hi, Wi, C], Weight[K, Y, X, C], Output[N, Ho, Wo, K]\n"
|
||||
<< "arg4: verification (0: no, 1: yes)\n"
|
||||
<< "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n"
|
||||
<< "arg6: print tensor value (0: no; 1: yes)\n"
|
||||
<< "arg7: time kernel (0: no, 1: yes)\n"
|
||||
<< ck::utils::conv::get_conv_param_parser_helper_msg() << " SplitK\n"
|
||||
<< std::endl;
|
||||
std::cout << "arg1: tensor operation (conv_bwd_weight: Convolution Backward Weight\n"
|
||||
<< "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
|
||||
<< " 1: Input fp16, Weight fp16, Output fp16\n"
|
||||
<< " 2: Input bf16, Weight fp32, Output bf16)\n"
|
||||
<< "arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
|
||||
"N, K, Ho, Wo]\n"
|
||||
<< " 1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
|
||||
"N, Ho, Wo, K]\n"
|
||||
<< "arg4: verification (0: no, 1: yes)\n"
|
||||
<< "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n"
|
||||
<< "arg6: print tensor value (0: no; 1: yes)\n"
|
||||
<< "arg7: time kernel (0: no, 1: yes)\n"
|
||||
<< ck::utils::conv::get_conv_param_parser_helper_msg() << " SplitK\n"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int profile_conv_bwd_weight(int argc, char* argv[])
|
||||
int profile_grouped_conv_bwd_weight(int argc, char* argv[])
|
||||
{
|
||||
// 8 for control, 1 for num_dim_spatial
|
||||
if(argc < 9)
|
||||
@@ -75,17 +76,17 @@ int profile_conv_bwd_weight(int argc, char* argv[])
|
||||
using F16 = ck::half_t;
|
||||
using BF16 = ck::bhalf_t;
|
||||
|
||||
using NWC = ck::tensor_layout::convolution::NWC;
|
||||
using NHWC = ck::tensor_layout::convolution::NHWC;
|
||||
using NDHWC = ck::tensor_layout::convolution::NDHWC;
|
||||
using GNWC = ck::tensor_layout::convolution::GNWC;
|
||||
using GNHWC = ck::tensor_layout::convolution::GNHWC;
|
||||
using GNDHWC = ck::tensor_layout::convolution::GNDHWC;
|
||||
|
||||
using KXC = ck::tensor_layout::convolution::KXC;
|
||||
using KYXC = ck::tensor_layout::convolution::KYXC;
|
||||
using KZYXC = ck::tensor_layout::convolution::KZYXC;
|
||||
using GKXC = ck::tensor_layout::convolution::GKXC;
|
||||
using GKYXC = ck::tensor_layout::convolution::GKYXC;
|
||||
using GKZYXC = ck::tensor_layout::convolution::GKZYXC;
|
||||
|
||||
using NWK = ck::tensor_layout::convolution::NWK;
|
||||
using NHWK = ck::tensor_layout::convolution::NHWK;
|
||||
using NDHWK = ck::tensor_layout::convolution::NDHWK;
|
||||
using GNWK = ck::tensor_layout::convolution::GNWK;
|
||||
using GNHWK = ck::tensor_layout::convolution::GNHWK;
|
||||
using GNDHWK = ck::tensor_layout::convolution::GNDHWK;
|
||||
|
||||
constexpr auto I1 = ck::Number<1>{};
|
||||
constexpr auto I2 = ck::Number<2>{};
|
||||
@@ -108,64 +109,64 @@ int profile_conv_bwd_weight(int argc, char* argv[])
|
||||
using WeiDataType = decltype(wei_type);
|
||||
using OutDataType = decltype(out_type);
|
||||
|
||||
bool pass = ck::profiler::profile_conv_bwd_weight_impl<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType>(
|
||||
bool pass = ck::profiler::profile_grouped_conv_bwd_weight_impl<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType>(
|
||||
do_verification, init_method, do_log, time_kernel, params, split_k);
|
||||
|
||||
return pass ? 0 : 1;
|
||||
};
|
||||
|
||||
if(num_dim_spatial == 1 && layout == ConvLayout::NHWC_KYXC_NHWK)
|
||||
if(num_dim_spatial == 1 && layout == ConvLayout::GNHWC_GKYXC_GNHWK)
|
||||
{
|
||||
if(data_type == ConvDataType::F32_F32_F32)
|
||||
{
|
||||
return profile(I1, NWC{}, KXC{}, NWK{}, F32{}, F32{}, F32{});
|
||||
return profile(I1, GNWC{}, GKXC{}, GNWK{}, F32{}, F32{}, F32{});
|
||||
}
|
||||
else if(data_type == ConvDataType::F16_F16_F16)
|
||||
{
|
||||
return profile(I1, NWC{}, KXC{}, NWK{}, F16{}, F16{}, F16{});
|
||||
return profile(I1, GNWC{}, GKXC{}, GNWK{}, F16{}, F16{}, F16{});
|
||||
}
|
||||
else if(data_type == ConvDataType::BF16_F32_BF16)
|
||||
{
|
||||
// fp32 atomic add is used for weight tensor in bf16 kernel
|
||||
return profile(I1, NWC{}, KXC{}, NWK{}, BF16{}, F32{}, BF16{});
|
||||
return profile(I1, GNWC{}, GKXC{}, GNWK{}, BF16{}, F32{}, BF16{});
|
||||
}
|
||||
}
|
||||
else if(num_dim_spatial == 2 && layout == ConvLayout::NHWC_KYXC_NHWK)
|
||||
else if(num_dim_spatial == 2 && layout == ConvLayout::GNHWC_GKYXC_GNHWK)
|
||||
{
|
||||
if(data_type == ConvDataType::F32_F32_F32)
|
||||
{
|
||||
return profile(I2, NHWC{}, KYXC{}, NHWK{}, F32{}, F32{}, F32{});
|
||||
return profile(I2, GNHWC{}, GKYXC{}, GNHWK{}, F32{}, F32{}, F32{});
|
||||
}
|
||||
else if(data_type == ConvDataType::F16_F16_F16)
|
||||
{
|
||||
return profile(I2, NHWC{}, KYXC{}, NHWK{}, F16{}, F16{}, F16{});
|
||||
return profile(I2, GNHWC{}, GKYXC{}, GNHWK{}, F16{}, F16{}, F16{});
|
||||
}
|
||||
else if(data_type == ConvDataType::BF16_F32_BF16)
|
||||
{
|
||||
// fp32 atomic add is used for weight tensor in bf16 kernel
|
||||
return profile(I2, NHWC{}, KYXC{}, NHWK{}, BF16{}, F32{}, BF16{});
|
||||
return profile(I2, GNHWC{}, GKYXC{}, GNHWK{}, BF16{}, F32{}, BF16{});
|
||||
}
|
||||
}
|
||||
else if(num_dim_spatial == 3 && layout == ConvLayout::NHWC_KYXC_NHWK)
|
||||
else if(num_dim_spatial == 3 && layout == ConvLayout::GNHWC_GKYXC_GNHWK)
|
||||
{
|
||||
if(data_type == ConvDataType::F32_F32_F32)
|
||||
{
|
||||
return profile(I3, NDHWC{}, KZYXC{}, NDHWK{}, F32{}, F32{}, F32{});
|
||||
return profile(I3, GNDHWC{}, GKZYXC{}, GNDHWK{}, F32{}, F32{}, F32{});
|
||||
}
|
||||
else if(data_type == ConvDataType::F16_F16_F16)
|
||||
{
|
||||
return profile(I3, NDHWC{}, KZYXC{}, NDHWK{}, F16{}, F16{}, F16{});
|
||||
return profile(I3, GNDHWC{}, GKZYXC{}, GNDHWK{}, F16{}, F16{}, F16{});
|
||||
}
|
||||
else if(data_type == ConvDataType::BF16_F32_BF16)
|
||||
{
|
||||
// fp32 atomic add is used for weight tensor in bf16 kernel
|
||||
return profile(I3, NDHWC{}, KZYXC{}, NDHWK{}, BF16{}, F32{}, BF16{});
|
||||
return profile(I3, GNDHWC{}, GKZYXC{}, GNDHWK{}, BF16{}, F32{}, BF16{});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,8 +18,8 @@ int profile_conv_fwd(int, char*[]);
|
||||
int profile_conv_fwd_bias_relu(int, char*[]);
|
||||
int profile_conv_fwd_bias_relu_add(int, char*[]);
|
||||
int profile_conv_bwd_data(int, char*[]);
|
||||
int profile_conv_bwd_weight(int, char*[]);
|
||||
int profile_grouped_conv_fwd(int, char*[]);
|
||||
int profile_grouped_conv_bwd_weight(int, char*[]);
|
||||
int profile_softmax(int, char*[]);
|
||||
int profile_layernorm(int, char*[]);
|
||||
int profile_groupnorm(int, char*[]);
|
||||
@@ -43,8 +43,8 @@ static void print_helper_message()
|
||||
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n"
|
||||
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n"
|
||||
" conv_bwd_data: Convolution Backward Data\n"
|
||||
" conv_bwd_weight: Convolution Backward Weight\n"
|
||||
" grouped_conv_fwd: Grouped Convolution Forward\n"
|
||||
" grouped_conv_bwd_weight: Grouped Convolution Backward Weight\n"
|
||||
" softmax: Softmax\n"
|
||||
" reduce: Reduce\n");
|
||||
// clang-format on
|
||||
@@ -118,14 +118,14 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
return profile_conv_bwd_data(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "conv_bwd_weight") == 0)
|
||||
{
|
||||
return profile_conv_bwd_weight(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "grouped_conv_fwd") == 0)
|
||||
{
|
||||
return profile_grouped_conv_fwd(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "conv_bwd_weight") == 0)
|
||||
{
|
||||
return profile_grouped_conv_bwd_weight(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "reduce") == 0)
|
||||
{
|
||||
return profile_reduce(argc, argv);
|
||||
|
||||
Reference in New Issue
Block a user