mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
[CK][Test] Moving device_op creation before data initialization.
Signed-off-by: Michal Kulikowski <Michal.Kulikowski@amd.com>
This commit is contained in:
committed by
Michał Kulikowski
parent
29c4f868ef
commit
3c2c294ffb
@@ -99,6 +99,23 @@ bool profile_batched_contraction_multiple_d_impl(int do_verification,
|
||||
std::cout << "d_gs_ms_ns: " << d_gs_ms_ns.mDesc << std::endl;
|
||||
std::cout << "e_gs_ms_ns: " << e_gs_ms_ns_host_result.mDesc << std::endl;
|
||||
|
||||
// get device op instances
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceBatchedContractionMultipleD<NumDimG,
|
||||
NumDimM,
|
||||
NumDimN,
|
||||
NumDimK,
|
||||
ADataType,
|
||||
BDataType,
|
||||
DsDataType,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
BElementOp,
|
||||
CDEElementOp>;
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
@@ -181,23 +198,6 @@ bool profile_batched_contraction_multiple_d_impl(int do_verification,
|
||||
}
|
||||
}
|
||||
|
||||
// get device op instances
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceBatchedContractionMultipleD<NumDimG,
|
||||
NumDimM,
|
||||
NumDimN,
|
||||
NumDimK,
|
||||
ADataType,
|
||||
BDataType,
|
||||
DsDataType,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
BElementOp,
|
||||
CDEElementOp>;
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
std::string best_op_name;
|
||||
float best_ave_time = 0;
|
||||
float best_tflops = 0;
|
||||
|
||||
@@ -85,6 +85,12 @@ bool profile_batched_gemm_impl(int do_verification,
|
||||
std::cout << "b_g_k_n: " << b_g_k_n.mDesc << std::endl;
|
||||
std::cout << "c_g_m_n: " << c_g_m_n_host_result.mDesc << std::endl;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
@@ -129,12 +135,6 @@ bool profile_batched_gemm_impl(int do_verification,
|
||||
b_device_buf.ToDevice(b_g_k_n.mData.data());
|
||||
c_device_buf.ToDevice(c_g_m_n_device_result.mData.data());
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
std::string best_op_name;
|
||||
float best_ave_time = 0;
|
||||
float best_tflops = 0;
|
||||
|
||||
@@ -63,6 +63,27 @@ bool profile_batchnorm_backward_impl(bool do_verification,
|
||||
};
|
||||
}
|
||||
|
||||
using PassThroughOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
// add device batchnorm-backward instances
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceBatchNormBwd<XDataType,
|
||||
DxDataType,
|
||||
DxDataType,
|
||||
AccDataType,
|
||||
ScaleDataType,
|
||||
DscaleDbiasDataType,
|
||||
MeanVarDataType,
|
||||
PassThroughOp,
|
||||
Rank,
|
||||
NumBatchNormReduceDim>;
|
||||
|
||||
// get device op instances
|
||||
const auto instance_ptrs =
|
||||
ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << instance_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
// input data of the batchnorm backward algorithm
|
||||
Tensor<XDataType> x(inOutLengths);
|
||||
Tensor<DyDataType> dy(inOutLengths);
|
||||
@@ -191,27 +212,6 @@ bool profile_batchnorm_backward_impl(bool do_verification,
|
||||
|
||||
std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin());
|
||||
|
||||
using PassThroughOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
// add device batchnorm-backward instances
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceBatchNormBwd<XDataType,
|
||||
DxDataType,
|
||||
DxDataType,
|
||||
AccDataType,
|
||||
ScaleDataType,
|
||||
DscaleDbiasDataType,
|
||||
MeanVarDataType,
|
||||
PassThroughOp,
|
||||
Rank,
|
||||
NumBatchNormReduceDim>;
|
||||
|
||||
// get device op instances
|
||||
const auto instance_ptrs =
|
||||
ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << instance_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
std::string best_instance_name;
|
||||
float best_avg_time = std::numeric_limits<float>::max();
|
||||
float best_gb_per_sec = 0;
|
||||
|
||||
@@ -111,6 +111,23 @@ bool profile_conv_bwd_data_impl(int do_verification,
|
||||
std::cout << "weight: " << weight.mDesc << std::endl;
|
||||
std::cout << "output: " << output.mDesc << std::endl;
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceConvBwdData<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
@@ -179,23 +196,6 @@ bool profile_conv_bwd_data_impl(int do_verification,
|
||||
gpu_ref_in_dev.FromDevice(gpu_ref_input.mData.data());
|
||||
}
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceConvBwdData<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
std::string best_op_name;
|
||||
float best_avg_time = 0;
|
||||
float best_tflops = 0;
|
||||
|
||||
@@ -89,6 +89,23 @@ bool profile_conv_fwd_impl(int do_verification,
|
||||
std::cout << "weight: " << weight.mDesc << std::endl;
|
||||
std::cout << "output: " << host_output.mDesc << std::endl;
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceConvFwd<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
@@ -158,23 +175,6 @@ bool profile_conv_fwd_impl(int do_verification,
|
||||
gpu_ref_out_dev.FromDevice(gpu_ref_output.mData.data());
|
||||
}
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceConvFwd<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
std::string best_op_name;
|
||||
float best_avg_time = 0;
|
||||
float best_tflops = 0;
|
||||
|
||||
@@ -115,6 +115,28 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
|
||||
std::cout << "wei: " << wei_g_k_c_xs_desc << std::endl;
|
||||
std::cout << "in: " << in_g_n_c_wis_desc << std::endl;
|
||||
|
||||
using DeviceOp =
|
||||
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD<NDimSpatial,
|
||||
OutLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
InLayout,
|
||||
OutDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<>,
|
||||
InDataType,
|
||||
OutElementOp,
|
||||
WeiElementOp,
|
||||
InElementOp,
|
||||
ComputeDataType,
|
||||
ComputeDataType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
// Create host tensors
|
||||
Tensor<OutDataType> out(out_g_n_k_wos_desc);
|
||||
Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
|
||||
@@ -438,26 +460,6 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
|
||||
};
|
||||
|
||||
// do GEMM
|
||||
using DeviceOp =
|
||||
ck::tensor_operation::device::DeviceGroupedConvBwdDataMultipleD<NDimSpatial,
|
||||
OutLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
InLayout,
|
||||
OutDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<>,
|
||||
InDataType,
|
||||
OutElementOp,
|
||||
WeiElementOp,
|
||||
InElementOp,
|
||||
ComputeDataType,
|
||||
ComputeDataType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::array<ck::index_t, NDimSpatial + 3> out_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial + 3> out_strides{};
|
||||
std::array<ck::index_t, NDimSpatial + 3> wei_lengths{};
|
||||
|
||||
@@ -118,6 +118,25 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
|
||||
std::cout << "weight: " << wei_g_k_c_xs_desc << std::endl;
|
||||
std::cout << "output: " << out_g_n_k_wos_desc << std::endl;
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvBwdWeight<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
ComputeTypeA,
|
||||
ComputeTypeB>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
// Create host tensors
|
||||
Tensor<InDataType> input(in_g_n_c_wis_desc);
|
||||
Tensor<WeiDataType> weight_host_result(wei_g_k_c_xs_desc);
|
||||
@@ -244,25 +263,6 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
|
||||
}
|
||||
}
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvBwdWeight<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
ComputeTypeA,
|
||||
ComputeTypeB>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
std::string best_op_name;
|
||||
float best_avg_time = 0;
|
||||
float best_tflops = 0;
|
||||
|
||||
@@ -129,6 +129,28 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
|
||||
std::cout << "output: " << host_output.mDesc << std::endl;
|
||||
std::cout << "bias: " << bias.mDesc << std::endl;
|
||||
|
||||
using DeviceOp =
|
||||
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<OutLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<OutDataType>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
@@ -339,28 +361,6 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
|
||||
}
|
||||
};
|
||||
|
||||
using DeviceOp =
|
||||
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<OutLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<OutDataType>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
if constexpr(BiasGK)
|
||||
{
|
||||
constexpr ck::index_t spatial_offset = 3;
|
||||
|
||||
@@ -107,6 +107,27 @@ bool profile_grouped_conv_fwd_bilinear_impl(
|
||||
std::cout << "d_tensor: " << d_tensor.mDesc << std::endl;
|
||||
std::cout << "output: " << host_output.mDesc << std::endl;
|
||||
|
||||
using DeviceOp =
|
||||
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<DLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<DDataType>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
@@ -231,27 +252,6 @@ bool profile_grouped_conv_fwd_bilinear_impl(
|
||||
float best_gb_per_sec = 0;
|
||||
int valids = 0;
|
||||
|
||||
using DeviceOp =
|
||||
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<DLayout>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<DDataType>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
for(std::size_t i = 0; i < op_ptrs.size(); ++i)
|
||||
{
|
||||
auto& op_ptr = op_ptrs[i];
|
||||
|
||||
@@ -145,6 +145,27 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
std::cout << "weight: " << wei_g_k_c_xs_desc << std::endl;
|
||||
std::cout << "output: " << out_g_n_k_wos_desc << std::endl;
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
// Create host tensors
|
||||
Tensor<InDataType> input(in_g_n_c_wis_desc);
|
||||
Tensor<WeiDataType> weight(wei_g_k_c_xs_desc);
|
||||
@@ -411,27 +432,6 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
}
|
||||
};
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
if(list_instances)
|
||||
{
|
||||
std::cout << "\nValid instances for this problem:" << std::endl;
|
||||
|
||||
@@ -151,6 +151,27 @@ bool profile_grouped_conv_fwd_outelementop_impl(int do_verification,
|
||||
std::cout << "scale_wei: " << scale_wei << std::endl;
|
||||
std::cout << "scale_out: " << scale_out << std::endl;
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
// run reference op
|
||||
if(do_verification == 1)
|
||||
{
|
||||
@@ -340,27 +361,6 @@ bool profile_grouped_conv_fwd_outelementop_impl(int do_verification,
|
||||
}
|
||||
};
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
OutLayout,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
ck::Tuple<>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
AComputeType,
|
||||
BComputeType>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
for(auto& op_ptr : op_ptrs)
|
||||
{
|
||||
auto argument_ptr = op_ptr->MakeArgumentPointer(in_device_buf.GetDeviceBuffer(),
|
||||
|
||||
@@ -140,6 +140,25 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
|
||||
std::cout << "wei: " << wei_host.mDesc << std::endl;
|
||||
std::cout << "out: " << out.mDesc << std::endl;
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvBwdWeightMultipleD<
|
||||
NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
ck::Tuple<WeiLayout>,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
ck::Tuple<WeiDataType>,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
|
||||
out.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
|
||||
d.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
|
||||
@@ -179,23 +198,6 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
|
||||
|
||||
RunReference(conv_param, in, wei_host, out, d);
|
||||
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvBwdWeightMultipleD<
|
||||
NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
OutLayout,
|
||||
ck::Tuple<WeiLayout>,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
ck::Tuple<WeiDataType>,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
int num_kernel = 0;
|
||||
|
||||
for(std::size_t i = 0; i < op_ptrs.size(); ++i)
|
||||
|
||||
@@ -104,6 +104,27 @@ bool profile_grouped_conv_fwd_scaleadd_ab_impl(int do_verification,
|
||||
std::cout << "weight: " << weight.mDesc << std::endl;
|
||||
std::cout << "output: " << host_output.mDesc << std::endl;
|
||||
|
||||
// InDataType and WeiDataType must be tuple, inLayout and weiLayout are single.
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<
|
||||
NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
OutLayout,
|
||||
ck::Tuple<InDataType, InDataType>,
|
||||
ck::Tuple<WeiDataType, WeiDataType>,
|
||||
ck::Tuple<>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
@@ -246,27 +267,6 @@ bool profile_grouped_conv_fwd_scaleadd_ab_impl(int do_verification,
|
||||
}
|
||||
};
|
||||
|
||||
// InDataType and WeiDataType must be tuple, inLayout and weiLayout are single.
|
||||
using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvFwdMultipleABD<
|
||||
NDimSpatial,
|
||||
InLayout,
|
||||
WeiLayout,
|
||||
ck::Tuple<>,
|
||||
OutLayout,
|
||||
ck::Tuple<InDataType, InDataType>,
|
||||
ck::Tuple<WeiDataType, WeiDataType>,
|
||||
ck::Tuple<>,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
// get device op instances
|
||||
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
|
||||
DeviceOp>::GetInstances();
|
||||
|
||||
std::cout << "ckProfiler found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
std::array<const void*, NumAs> as{in_device_buf.GetDeviceBuffer(),
|
||||
in_bias_device_buf.GetDeviceBuffer()};
|
||||
std::array<const void*, NumBs> bs{wei_device_buf.GetDeviceBuffer(),
|
||||
|
||||
Reference in New Issue
Block a user