adding implicit gemm

This commit is contained in:
Chao Liu
2019-01-16 02:44:10 -06:00
parent 913afaeb5d
commit 89ee259752
6 changed files with 44 additions and 39 deletions

View File

@@ -1,5 +1,5 @@
#pragma once
//#include "gridwise_implicit_gemm_convolution_nchw_kcsr.cuh"
#include "gridwise_implicit_gemm_convolution_nchw_kcsr.cuh"
#include "gridwise_implicit_gemm_convolution_nchw_srck.cuh"
template <class T, class InDesc, class WeiDesc, class OutDesc>
@@ -26,14 +26,13 @@ void device_implicit_gemm_convolution(
constexpr auto wei_desc = WeiDesc{};
constexpr auto out_desc = OutDesc{};
#if 1
#if 0
constexpr unsigned NPerBlock = 1;
constexpr unsigned KPerBlock = 1;
constexpr unsigned CPerBlock = 1;
constexpr unsigned HoPerBlock = 2;
constexpr unsigned WoPerBlock = 32;
constexpr unsigned NPerThread = 1;
constexpr unsigned KPerThread = 1;
constexpr unsigned CPerThread = 1;
constexpr unsigned HoPerThread = 2;
@@ -47,13 +46,25 @@ void device_implicit_gemm_convolution(
constexpr unsigned HoPerBlock = 2;
constexpr unsigned WoPerBlock = 32;
constexpr unsigned NPerThread = 2;
constexpr unsigned KPerThread = 4;
constexpr unsigned CPerThread = 2;
constexpr unsigned HoPerThread = 2;
constexpr unsigned WoPerThread = 2;
constexpr unsigned BlockSize = 128;
#elif 1
constexpr unsigned NPerBlock = 2;
constexpr unsigned KPerBlock = 64;
constexpr unsigned CPerBlock = 4;
constexpr unsigned HoPerBlock = 2;
constexpr unsigned WoPerBlock = 32;
constexpr unsigned KPerThread = 4;
constexpr unsigned CPerThread = 1;
constexpr unsigned HoPerThread = 2;
constexpr unsigned WoPerThread = 2;
constexpr unsigned BlockSize = 256;
#endif
constexpr unsigned GridSize =