adding implicit gemm v3

This commit is contained in:
Chao Liu
2019-05-15 09:58:17 -05:00
parent 4957d5a399
commit b7d052459d
29 changed files with 977 additions and 296 deletions

View File

@@ -13,6 +13,7 @@
#include "device_convolution_implicit_gemm_v1_nchw_cyxk_khwn.hpp"
#include "device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp"
#include "device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
#include "device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
struct GeneratorTensor_1
{
@@ -410,7 +411,7 @@ void check_error(const Tensor<T>& ref, const Tensor<T>& result)
int main(int argc, char* argv[])
{
#if 1
#if 0
// 3x3, 34x34
constexpr index_t N = 64;
constexpr index_t C = 256;
@@ -434,7 +435,7 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0;
constexpr index_t WPad = 0;
#elif 0
#elif 1
// 3x3 filter, 28x28 image
constexpr index_t N = 128;
constexpr index_t C = 256;
@@ -603,7 +604,7 @@ int main(int argc, char* argv[])
#if 1
#if 0
device_direct_convolution_1
#elif 1
#elif 0
device_convolution_direct_v2_nchw_kcyx_nkhw
#elif 0
device_direct_convolution_2_vectorized_nchw_kcyx_nkhw
@@ -615,6 +616,8 @@ int main(int argc, char* argv[])
device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw
#elif 0
device_convolution_implicit_gemm_v2_chwn_cyxk_khwn
#elif 1
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw
#endif
(in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);