From d2a488ddeca43e1b6bf6f1c3ceb4abf067c49962 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Tue, 20 Nov 2018 10:34:16 -0600 Subject: [PATCH] hand tuned params --- driver/conv.cu | 4 ++-- driver/device_direct_convolution_3.cuh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/conv.cu b/driver/conv.cu index 5c899e00c9..2704db43f6 100644 --- a/driver/conv.cu +++ b/driver/conv.cu @@ -170,7 +170,7 @@ int main() int num_thread = std::thread::hardware_concurrency(); -#if 1 +#if 0 in.GenerateTensorValue(GeneratorTensor{}, num_thread); wei.GenerateTensorValue(GeneratorTensor{}, num_thread); #endif @@ -180,7 +180,7 @@ int main() device_convolution(in_desc, in, wei_desc, wei, out_desc, out_device); } -#if 1 +#if 0 host_convolution(in, wei, out_host); float error = 0; diff --git a/driver/device_direct_convolution_3.cuh b/driver/device_direct_convolution_3.cuh index b449c3e857..2be9201f84 100644 --- a/driver/device_direct_convolution_3.cuh +++ b/driver/device_direct_convolution_3.cuh @@ -27,9 +27,9 @@ void device_convolution( constexpr unsigned OutTileSizeH = 2; constexpr unsigned OutTileSizeW = 2; constexpr unsigned NPerBlock = 2; - constexpr unsigned KPerBlock = 8; + constexpr unsigned KPerBlock = 32; constexpr unsigned CPerBlock = 2; - constexpr unsigned YPerBlock = 4; + constexpr unsigned YPerBlock = 1; constexpr unsigned XPerBlock = 16; constexpr unsigned NPerThread = 2;