adding implicit gemm v3

[ROCm/composable_kernel commit: 1cc683a3a3]
This commit is contained in:
Chao Liu
2019-05-23 22:10:40 -05:00
parent 979dc4da2e
commit 39b829b919
16 changed files with 347 additions and 95 deletions

View File

@@ -80,8 +80,10 @@ __device__ void threadwise_direct_convolution_2(InDesc,
constexpr auto wei_desc = WeiDesc{};
constexpr auto out_desc = OutDesc{};
constexpr auto in_reg_desc = make_ConstantTensorDescriptor(in_desc.GetLengths());
constexpr auto wei_reg_desc = make_ConstantTensorDescriptor(wei_desc.GetLengths());
constexpr auto in_reg_desc =
make_ConstantTensorDescriptor_default_rank_packed(in_desc.GetLengths());
constexpr auto wei_reg_desc =
make_ConstantTensorDescriptor_default_rank_packed(wei_desc.GetLengths());
// register
TInWei p_in_reg[in_reg_desc.GetElementSpace()];