From cb8367c0eb227824816a9aeaa3c7370bf628a26a Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Tue, 20 Nov 2018 10:43:37 -0600 Subject: [PATCH] rename [ROCm/composable_kernel commit: 6790b8f3cc4fd32a9d9a43c6c9d80b826d969980] --- driver/conv.cu | 5 +++-- ...on_3.cuh => device_direct_convolution_1.cuh} | 17 +++++------------ driver/device_direct_convolution_2.cuh | 17 ++++++++++++----- ...ion.cuh => blockwise_direct_convolution.cuh} | 2 +- ..._2.cuh => gridwise_direct_convolution_1.cuh} | 2 +- ..._3.cuh => gridwise_direct_convolution_2.cuh} | 4 ++-- ...on.cuh => threadwise_direct_convolution.cuh} | 0 7 files changed, 24 insertions(+), 23 deletions(-) rename driver/{device_direct_convolution_3.cuh => device_direct_convolution_1.cuh} (88%) rename src/include/{blockwise_convolution.cuh => blockwise_direct_convolution.cuh} (99%) rename src/include/{direct_convolution_2.cuh => gridwise_direct_convolution_1.cuh} (99%) rename src/include/{direct_convolution_3.cuh => gridwise_direct_convolution_2.cuh} (99%) rename src/include/{threadwise_convolution.cuh => threadwise_direct_convolution.cuh} (100%) diff --git a/driver/conv.cu b/driver/conv.cu index 2704db43f6..966f94dfc7 100644 --- a/driver/conv.cu +++ b/driver/conv.cu @@ -5,7 +5,8 @@ #include "nvToolsExt.h" #include "tensor.hpp" #include "constant_tensor_descriptor.cuh" -#include "device_direct_convolution_3.cuh" +#include "device_direct_convolution_1.cuh" +#include "device_direct_convolution_2.cuh" template struct GeneratorConstant @@ -177,7 +178,7 @@ int main() for(int i = 0; i < 20; ++i) { - device_convolution(in_desc, in, wei_desc, wei, out_desc, out_device); + device_direct_convolution_2(in_desc, in, wei_desc, wei, out_desc, out_device); } #if 0 diff --git a/driver/device_direct_convolution_3.cuh b/driver/device_direct_convolution_1.cuh similarity index 88% rename from driver/device_direct_convolution_3.cuh rename to driver/device_direct_convolution_1.cuh index 2be9201f84..7a0fc29fbe 100644 --- a/driver/device_direct_convolution_3.cuh +++ b/driver/device_direct_convolution_1.cuh @@ -1,8 +1,8 @@ #pragma once -#include "direct_convolution_3.cuh" +#include "gridwise_direct_convolution_1.cuh" template -void device_convolution( +void device_direct_convolution_1( InDesc, const Tensor& in, WeiDesc, const Tensor& wei, OutDesc, Tensor& out) { std::size_t data_sz = sizeof(T); @@ -26,16 +26,12 @@ void device_convolution( constexpr auto out_desc = OutDesc{}; constexpr unsigned OutTileSizeH = 2; constexpr unsigned OutTileSizeW = 2; - constexpr unsigned NPerBlock = 2; - constexpr unsigned KPerBlock = 32; + constexpr unsigned NPerBlock = 1; + constexpr unsigned KPerBlock = 4; constexpr unsigned CPerBlock = 2; - constexpr unsigned YPerBlock = 1; + constexpr unsigned YPerBlock = 8; constexpr unsigned XPerBlock = 16; - constexpr unsigned NPerThread = 2; - constexpr unsigned KPerThread = 4; - constexpr unsigned CPerThread = 2; - constexpr unsigned NBlockOpLen0 = 1; constexpr unsigned NBlockOpLen1 = 1; constexpr unsigned NBlockOpLen2 = 4; @@ -70,9 +66,6 @@ void device_convolution( CPerBlock, YPerBlock, XPerBlock, - NPerThread, - KPerThread, - CPerThread, NBlockOpLen0, NBlockOpLen1, NBlockOpLen2, diff --git a/driver/device_direct_convolution_2.cuh b/driver/device_direct_convolution_2.cuh index 7fa644bdf5..12c27b36cb 100644 --- a/driver/device_direct_convolution_2.cuh +++ b/driver/device_direct_convolution_2.cuh @@ -1,8 +1,8 @@ #pragma once -#include "direct_convolution_2.cuh" +#include "gridwise_direct_convolution_2.cuh" template -void device_convolution( +void device_direct_convolution_2( InDesc, const Tensor& in, WeiDesc, const Tensor& wei, OutDesc, Tensor& out) { std::size_t data_sz = sizeof(T); @@ -26,12 +26,16 @@ void device_convolution( constexpr auto out_desc = OutDesc{}; constexpr unsigned OutTileSizeH = 2; constexpr unsigned OutTileSizeW = 2; - constexpr unsigned NPerBlock = 1; - constexpr unsigned KPerBlock = 4; + constexpr unsigned NPerBlock = 2; + constexpr unsigned KPerBlock = 32; constexpr unsigned CPerBlock = 2; - constexpr unsigned YPerBlock = 8; + constexpr unsigned YPerBlock = 1; constexpr unsigned XPerBlock = 16; + constexpr unsigned NPerThread = 2; + constexpr unsigned KPerThread = 4; + constexpr unsigned CPerThread = 2; + constexpr unsigned NBlockOpLen0 = 1; constexpr unsigned NBlockOpLen1 = 1; constexpr unsigned NBlockOpLen2 = 4; @@ -66,6 +70,9 @@ void device_convolution( CPerBlock, YPerBlock, XPerBlock, + NPerThread, + KPerThread, + CPerThread, NBlockOpLen0, NBlockOpLen1, NBlockOpLen2, diff --git a/src/include/blockwise_convolution.cuh b/src/include/blockwise_direct_convolution.cuh similarity index 99% rename from src/include/blockwise_convolution.cuh rename to src/include/blockwise_direct_convolution.cuh index dd3e56a3a4..40a6be6aac 100644 --- a/src/include/blockwise_convolution.cuh +++ b/src/include/blockwise_direct_convolution.cuh @@ -1,7 +1,7 @@ #pragma once #include "constant_tensor_descriptor.cuh" #include "threadwise_tensor_op.cuh" -#include "threadwise_convolution.cuh" +#include "threadwise_direct_convolution.cuh" template