This commit is contained in:
Chao Liu
2018-11-28 16:20:01 -06:00
parent fee92fb636
commit 1eafc9c1fb
4 changed files with 27 additions and 22 deletions

View File

@@ -302,7 +302,7 @@ template <class T>
void check_error(const Tensor<T>& ref, const Tensor<T>& result)
{
float error = 0;
float max_diff = 0;
float max_diff = -1;
float ref_value = 0, result_value = 0;
for(int i = 0; i < ref.mData.size(); ++i)
{
@@ -338,6 +338,14 @@ int main()
constexpr unsigned K = 64;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 72;
constexpr unsigned C = 288;
constexpr unsigned HI = 38;
constexpr unsigned WI = 38;
constexpr unsigned K = 72;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
@@ -347,13 +355,13 @@ int main()
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned N = 1;
constexpr unsigned C = 1;
constexpr unsigned HI = 4;
constexpr unsigned WI = 4;
constexpr unsigned K = 1;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
constexpr unsigned K = 1;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
#elif 0
constexpr unsigned N = 2;
constexpr unsigned C = 3;

View File

@@ -26,13 +26,13 @@ void device_direct_convolution_1(
constexpr auto out_desc = OutDesc{};
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 1;
constexpr unsigned NPerBlock = 2;
constexpr unsigned KPerBlock = 16;
constexpr unsigned CPerBlock = 4;
constexpr unsigned YPerBlock = 4;
constexpr unsigned CPerBlock = 2;
constexpr unsigned YPerBlock = 2;
constexpr unsigned XPerBlock = 16;
constexpr unsigned NPerThread = 1;
constexpr unsigned NPerThread = 2;
constexpr unsigned KPerThread = 4;
constexpr unsigned CPerThread = 2;