initial cuda run

This commit is contained in:
Chao Liu
2018-10-22 20:51:26 -05:00
parent 2f2cf35bf4
commit 49ceb0fe04
2 changed files with 15 additions and 17 deletions

View File

@@ -14,26 +14,24 @@ struct DeviceTensorDescriptor
checkCudaErrors(cudaMalloc(&mpLengths, data_sz * mDim));
checkCudaErrors(cudaMalloc(&mpStrides, data_sz * mDim));
checkCudaErrors(
cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetLengths().data())),
mpLengths,
data_sz * mDim,
cudaMemcpyHostToDevice));
checkCudaErrors(
cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetStrides().data())),
mpStrides,
data_sz * mDim,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(
mpLengths, host_desc.GetLengths().data(), data_sz * mDim, cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(
mpStrides, host_desc.GetStrides().data(), data_sz * mDim, cudaMemcpyHostToDevice));
}
__host__ ~DeviceTensorDescriptor()
{
checkCudaErrors(cudaFree(mpLengths));
checkCudaErrors(cudaFree(mpStrides));
#if 0
if(mpLengths != nullptr)
checkCudaErrors(cudaFree(mpLengths));
if(mpStrides != nullptr)
checkCudaErrors(cudaFree(mpStrides));
#endif
}
DataType_t mDataType;
unsigned long mDim;
unsigned long* mpLengths;
unsigned long* mpStrides;
unsigned long* mpLengths = nullptr;
unsigned long* mpStrides = nullptr;
};

View File

@@ -3,10 +3,10 @@
template <class TFloat, int NBlockDim>
__global__ void direct_convolution(DeviceTensorDescriptor in_desc,
TFloat* const in,
TFloat* const p_in,
DeviceTensorDescriptor wei_desc,
TFloat* const wei,
TFloat* const p_wei,
DeviceTensorDescriptor out_desc,
TFloat* out)
TFloat* p_out)
{
}