hand tuned params

This commit is contained in:
Chao Liu
2018-11-20 10:34:16 -06:00
parent c587726190
commit d2a488ddec
2 changed files with 4 additions and 4 deletions

View File

@@ -170,7 +170,7 @@ int main()
int num_thread = std::thread::hardware_concurrency();
#if 1
#if 0
in.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
wei.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
#endif
@@ -180,7 +180,7 @@ int main()
device_convolution(in_desc, in, wei_desc, wei, out_desc, out_device);
}
#if 1
#if 0
host_convolution(in, wei, out_host);
float error = 0;

View File

@@ -27,9 +27,9 @@ void device_convolution(
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
constexpr unsigned KPerBlock = 8;
constexpr unsigned KPerBlock = 32;
constexpr unsigned CPerBlock = 2;
constexpr unsigned YPerBlock = 4;
constexpr unsigned YPerBlock = 1;
constexpr unsigned XPerBlock = 16;
constexpr unsigned NPerThread = 2;