From 49ceb0fe041cbe62ce6016d917bbc55e9645e3d9 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Mon, 22 Oct 2018 20:51:26 -0500 Subject: [PATCH] initial cuda run --- src/include/device_tensor.cuh | 26 ++++++++++++-------------- src/include/direct_convolution.cuh | 6 +++--- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/include/device_tensor.cuh b/src/include/device_tensor.cuh index 87865b53ad..56338452f6 100644 --- a/src/include/device_tensor.cuh +++ b/src/include/device_tensor.cuh @@ -14,26 +14,24 @@ struct DeviceTensorDescriptor checkCudaErrors(cudaMalloc(&mpLengths, data_sz * mDim)); checkCudaErrors(cudaMalloc(&mpStrides, data_sz * mDim)); - checkCudaErrors( - cudaMemcpy(const_cast(static_cast(host_desc.GetLengths().data())), - mpLengths, - data_sz * mDim, - cudaMemcpyHostToDevice)); - checkCudaErrors( - cudaMemcpy(const_cast(static_cast(host_desc.GetStrides().data())), - mpStrides, - data_sz * mDim, - cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy( + mpLengths, host_desc.GetLengths().data(), data_sz * mDim, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy( + mpStrides, host_desc.GetStrides().data(), data_sz * mDim, cudaMemcpyHostToDevice)); } __host__ ~DeviceTensorDescriptor() { - checkCudaErrors(cudaFree(mpLengths)); - checkCudaErrors(cudaFree(mpStrides)); +#if 0 + if(mpLengths != nullptr) + checkCudaErrors(cudaFree(mpLengths)); + if(mpStrides != nullptr) + checkCudaErrors(cudaFree(mpStrides)); +#endif } DataType_t mDataType; unsigned long mDim; - unsigned long* mpLengths; - unsigned long* mpStrides; + unsigned long* mpLengths = nullptr; + unsigned long* mpStrides = nullptr; }; diff --git a/src/include/direct_convolution.cuh b/src/include/direct_convolution.cuh index 2816028ac8..8315dcbb55 100644 --- a/src/include/direct_convolution.cuh +++ b/src/include/direct_convolution.cuh @@ -3,10 +3,10 @@ template __global__ void direct_convolution(DeviceTensorDescriptor in_desc, - TFloat* const in, + TFloat* const p_in, DeviceTensorDescriptor wei_desc, - TFloat* const wei, + TFloat* const p_wei, DeviceTensorDescriptor out_desc, - TFloat* out) + TFloat* p_out) { }