initial cuda build

[ROCm/composable_kernel commit: 2f2cf35bf4]
This commit is contained in:
Chao Liu
2018-10-22 11:51:10 -05:00
parent 6521ccba67
commit 6a5c465ad9
7 changed files with 179 additions and 82 deletions

View File

@@ -0,0 +1,12 @@
#pragma once
#include "device_tensor.cuh"
template <class TFloat, int NBlockDim>
__global__ void direct_convolution(DeviceTensorDescriptor in_desc,
TFloat* const in,
DeviceTensorDescriptor wei_desc,
TFloat* const wei,
DeviceTensorDescriptor out_desc,
TFloat* out)
{
}