Merge commit 'e7906dd644edfadcb6219b5f7f60d3e0d3a7301f' into develop

This commit is contained in:
assistant-librarian[bot]
2025-05-28 23:06:50 +00:00
parent c3d1cb9ba2
commit 6214352555
34 changed files with 291 additions and 195 deletions

View File

@@ -11,7 +11,7 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bias_relu.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bias_clamp.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
@@ -35,19 +35,22 @@ template <ck::index_t NDimSpatial,
typename AComputeType = InDataType,
typename BComputeType = AComputeType,
typename IndexType = ck::index_t>
bool profile_grouped_conv_fwd_bias_relu_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
{
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::AddRelu;
using OutElementOp = ck::tensor_operation::element_wise::AddClamp;
const float floor = 0.f;
const float ceil = 256.f;
const auto in_element_op = InElementOp{};
const auto wei_element_op = WeiElementOp{};
const auto out_element_op = OutElementOp{};
const auto out_element_op = OutElementOp{floor, ceil};
const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(conv_param);