remove dead code

[ROCm/composable_kernel commit: 4a99f54c31]
This commit is contained in:
Chao Liu
2019-05-02 11:09:42 -05:00
parent 537c363633
commit eb289d06a3
8 changed files with 289 additions and 669 deletions

View File

@@ -1,5 +1,6 @@
#pragma once
#include "ConstantTensorDescriptor.hip.hpp"
#include "threadwise_nd_tensor_op.hip.hpp"
// optimized for scenario if p_in, p_wei, p_out are in register
template <class TInWei, class TOut, class InDesc, class WeiDesc, class OutDesc>
@@ -84,10 +85,12 @@ __device__ void threadwise_direct_convolution_2(InDesc,
TInWei p_wei_reg[wei_reg_desc.GetElementSpace()];
// copy input tensor into register
threadwise_4d_tensor_copy(in_desc, p_in, in_reg_desc, p_in_reg, in_reg_desc.GetLengths());
threadwise_nd_tensor_copy(
in_desc, p_in, in_reg_desc, p_in_reg, in_reg_desc.GetLengths(), Number<1>{});
// copy input tensor into register
threadwise_4d_tensor_copy(wei_desc, p_wei, wei_reg_desc, p_wei_reg, wei_reg_desc.GetLengths());
threadwise_nd_tensor_copy(
wei_desc, p_wei, wei_reg_desc, p_wei_reg, wei_reg_desc.GetLengths(), Number<1>{});
// do convolution
threadwise_direct_convolution_1(