NHWC Conv2d Bwd weight fp16 ckprofiler and test (#166)

* change backward weight name

* start add bwd weight lib and profiler

* change tuning paramter

* change output info

* add bwd weight test

* change test info

* using conv_util

* change wgt to weight

* add }

* add fp32

[ROCm/composable_kernel commit: 781cacd2e6]
This commit is contained in:
ltqin
2022-04-05 09:32:00 +08:00
committed by GitHub
parent 573f1de6fa
commit f3eb4639a8
19 changed files with 814 additions and 42 deletions

View File

@@ -0,0 +1 @@
add_example_executable(example_conv2d_bwd_weight_xdl conv2d_bwd_weight_xdl.cpp)

View File

@@ -1,13 +1,13 @@
# Instructions for ```example_conv2d_wrw_xdl``` Example
# Instructions for ```example_conv2d_bwd_weight_xdl``` Example
## Run ```example_conv2d_wrw_xdl```
## Run ```example_conv2d_bwd_weight_xdl```
```bash
#arg1: verification (0=no, 1=yes)
#arg2: initialization (0=no init, 1=integer value, 2=decimal value)
#arg3: run kernel # of times (>1)
#arg4: is show log (0=no, 1=yes)
#arg5 to 19: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, RightPx, split-k
./example/conv2d_fwd_xdl 0 1 5 0 4
./bin/example_conv2d_bwd_weight_xdl 0 1 5 0 4
```
Result

View File

@@ -32,8 +32,8 @@ using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
// clang-format off
using DeviceConvWrWInstance = ck::tensor_operation::device::
DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
using DeviceConvBwdWeightInstance = ck::tensor_operation::device::
DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
InDataType, // InDataType
WeiDataType, // WeiDataType
OutDataType, // OutDataType
@@ -70,8 +70,8 @@ using DeviceConvWrWInstance = ck::tensor_operation::device::
8>; // CBlockTransferScalarPerVector_NWaveNPerXdl
// clang-format on
using ReferenceConvWrwInstance = ck::tensor_operation::host::
ReferenceConvWrw<InDataType, WeiDataType, OutDataType, InElementOp, WeiElementOp, OutElementOp>;
using ReferenceConvBwdWeightInstance = ck::tensor_operation::host::
ReferenceConvBwdWeight<InDataType, WeiDataType, OutDataType, InElementOp, WeiElementOp, OutElementOp>;
int main(int argc, char* argv[])
{
@@ -211,7 +211,7 @@ int main(int argc, char* argv[])
wei_device_buf.ToDevice(wei_k_c_y_x_device_result.mData.data());
// do GEMM
auto conv = DeviceConvWrWInstance{};
auto conv = DeviceConvBwdWeightInstance{};
auto invoker = conv.MakeInvoker();
auto argument = conv.MakeArgument(static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
@@ -256,7 +256,7 @@ int main(int argc, char* argv[])
if(do_verification)
{
auto ref_conv = ReferenceConvWrwInstance{};
auto ref_conv = ReferenceConvBwdWeightInstance{};
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_argument = ref_conv.MakeArgument(in_n_c_hi_wi,

View File

@@ -1 +0,0 @@
add_example_executable(example_conv2d_bwd_wgt_xdl conv2d_bwd_wgt_xdl.cpp)

View File

@@ -35,7 +35,7 @@ add_subdirectory(07_conv2d_fwd_bias_relu_add)
add_subdirectory(08_conv3d_fwd)
add_subdirectory(09_convnd_fwd)
add_subdirectory(10_conv2d_bwd_data)
add_subdirectory(11_conv2d_bwd_wgt)
add_subdirectory(11_conv2d_bwd_weight)
add_subdirectory(12_reduce)
add_subdirectory(13_pool2d_fwd)
add_subdirectory(14_gemm_xdl_requant_relu_requant)