mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
NHWC Conv2d Bwd weight fp16 ckprofiler and test (#166)
* change backward weight name
* start add bwd weight lib and profiler
* change tuning paramter
* change output info
* add bwd weight test
* change test info
* using conv_util
* change wgt to weight
* add }
* add fp32
[ROCm/composable_kernel commit: 781cacd2e6]
This commit is contained in:
1
example/11_conv2d_bwd_weight/CMakeLists.txt
Normal file
1
example/11_conv2d_bwd_weight/CMakeLists.txt
Normal file
@@ -0,0 +1 @@
|
||||
add_example_executable(example_conv2d_bwd_weight_xdl conv2d_bwd_weight_xdl.cpp)
|
||||
@@ -1,13 +1,13 @@
|
||||
# Instructions for ```example_conv2d_wrw_xdl``` Example
|
||||
# Instructions for ```example_conv2d_bwd_weight_xdl``` Example
|
||||
|
||||
## Run ```example_conv2d_wrw_xdl```
|
||||
## Run ```example_conv2d_bwd_weight_xdl```
|
||||
```bash
|
||||
#arg1: verification (0=no, 1=yes)
|
||||
#arg2: initialization (0=no init, 1=integer value, 2=decimal value)
|
||||
#arg3: run kernel # of times (>1)
|
||||
#arg4: is show log (0=no, 1=yes)
|
||||
#arg5 to 19: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, RightPx, split-k
|
||||
./example/conv2d_fwd_xdl 0 1 5 0 4
|
||||
./bin/example_conv2d_bwd_weight_xdl 0 1 5 0 4
|
||||
```
|
||||
|
||||
Result
|
||||
@@ -32,8 +32,8 @@ using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
// clang-format off
|
||||
using DeviceConvWrWInstance = ck::tensor_operation::device::
|
||||
DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
|
||||
using DeviceConvBwdWeightInstance = ck::tensor_operation::device::
|
||||
DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
|
||||
InDataType, // InDataType
|
||||
WeiDataType, // WeiDataType
|
||||
OutDataType, // OutDataType
|
||||
@@ -70,8 +70,8 @@ using DeviceConvWrWInstance = ck::tensor_operation::device::
|
||||
8>; // CBlockTransferScalarPerVector_NWaveNPerXdl
|
||||
// clang-format on
|
||||
|
||||
using ReferenceConvWrwInstance = ck::tensor_operation::host::
|
||||
ReferenceConvWrw<InDataType, WeiDataType, OutDataType, InElementOp, WeiElementOp, OutElementOp>;
|
||||
using ReferenceConvBwdWeightInstance = ck::tensor_operation::host::
|
||||
ReferenceConvBwdWeight<InDataType, WeiDataType, OutDataType, InElementOp, WeiElementOp, OutElementOp>;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
@@ -211,7 +211,7 @@ int main(int argc, char* argv[])
|
||||
wei_device_buf.ToDevice(wei_k_c_y_x_device_result.mData.data());
|
||||
|
||||
// do GEMM
|
||||
auto conv = DeviceConvWrWInstance{};
|
||||
auto conv = DeviceConvBwdWeightInstance{};
|
||||
auto invoker = conv.MakeInvoker();
|
||||
auto argument = conv.MakeArgument(static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
|
||||
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
|
||||
@@ -256,7 +256,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
if(do_verification)
|
||||
{
|
||||
auto ref_conv = ReferenceConvWrwInstance{};
|
||||
auto ref_conv = ReferenceConvBwdWeightInstance{};
|
||||
auto ref_invoker = ref_conv.MakeInvoker();
|
||||
|
||||
auto ref_argument = ref_conv.MakeArgument(in_n_c_hi_wi,
|
||||
@@ -1 +0,0 @@
|
||||
add_example_executable(example_conv2d_bwd_wgt_xdl conv2d_bwd_wgt_xdl.cpp)
|
||||
@@ -35,7 +35,7 @@ add_subdirectory(07_conv2d_fwd_bias_relu_add)
|
||||
add_subdirectory(08_conv3d_fwd)
|
||||
add_subdirectory(09_convnd_fwd)
|
||||
add_subdirectory(10_conv2d_bwd_data)
|
||||
add_subdirectory(11_conv2d_bwd_wgt)
|
||||
add_subdirectory(11_conv2d_bwd_weight)
|
||||
add_subdirectory(12_reduce)
|
||||
add_subdirectory(13_pool2d_fwd)
|
||||
add_subdirectory(14_gemm_xdl_requant_relu_requant)
|
||||
|
||||
Reference in New Issue
Block a user