mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
13 lines
444 B
Plaintext
13 lines
444 B
Plaintext
#pragma once
|
|
#include "device_tensor.cuh"
|
|
|
|
template <class TFloat, int NBlockDim>
|
|
__global__ void direct_convolution(DeviceTensorDescriptor in_desc,
|
|
TFloat* const in,
|
|
DeviceTensorDescriptor wei_desc,
|
|
TFloat* const wei,
|
|
DeviceTensorDescriptor out_desc,
|
|
TFloat* out)
|
|
{
|
|
}
|