mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
hip build
This commit is contained in:
@@ -8,11 +8,11 @@
|
||||
#include "ConstantTensorDescriptor.hip.hpp"
|
||||
#include "conv_common.hip.hpp"
|
||||
//#include "device_direct_convolution_1.hpp"
|
||||
#include "device_direct_convolution_2_nchw_kcyx_nkhw.hpp"
|
||||
#include "device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
|
||||
//#include "device_implicit_gemm_convolution_1_chwn_cyxk_khwn.hpp"
|
||||
//#include "device_direct_convolution_2_nchw_kcyx_nkhw.hpp"
|
||||
//#include "device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
|
||||
#include "device_implicit_gemm_convolution_1_chwn_cyxk_khwn.hpp"
|
||||
//#include "device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
|
||||
//#include "device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp"
|
||||
#include "device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp"
|
||||
|
||||
struct GeneratorTensor_1
|
||||
{
|
||||
@@ -353,7 +353,7 @@ void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
|
||||
std::size_t ho = HoPerTile * htile + j;
|
||||
for(int i = 0; i < WoPerTile; ++i)
|
||||
{
|
||||
std::size_t wo = WoPerTile * wtile + i;
|
||||
std::size_t wo = WoPerTile * wtile + i;
|
||||
out_nkhw(n, k, ho, wo) = out_hold(n, k, htile, wtile, j, i);
|
||||
}
|
||||
}
|
||||
@@ -568,7 +568,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
constexpr unsigned HPad = 2;
|
||||
constexpr unsigned WPad = 2;
|
||||
#elif 1
|
||||
#elif 0
|
||||
// 1x1 filter, 32x32 image
|
||||
constexpr unsigned N = 64;
|
||||
constexpr unsigned C = 256;
|
||||
@@ -578,6 +578,18 @@ int main(int argc, char* argv[])
|
||||
constexpr unsigned Y = 1;
|
||||
constexpr unsigned X = 1;
|
||||
|
||||
constexpr unsigned HPad = 0;
|
||||
constexpr unsigned WPad = 0;
|
||||
#elif 1
|
||||
// 1x1 filter, 14x14 image
|
||||
constexpr unsigned N = 128;
|
||||
constexpr unsigned C = 2048;
|
||||
constexpr unsigned HI = 14;
|
||||
constexpr unsigned WI = 14;
|
||||
constexpr unsigned K = 512;
|
||||
constexpr unsigned Y = 1;
|
||||
constexpr unsigned X = 1;
|
||||
|
||||
constexpr unsigned HPad = 0;
|
||||
constexpr unsigned WPad = 0;
|
||||
#endif
|
||||
@@ -594,8 +606,8 @@ int main(int argc, char* argv[])
|
||||
ostream_ConstantTensorDescriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
|
||||
ostream_ConstantTensorDescriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
|
||||
|
||||
using in_data_t = char;
|
||||
using out_data_t = int32_t;
|
||||
using in_data_t = float;
|
||||
using out_data_t = float;
|
||||
Tensor<in_data_t> in_nchw(make_TensorDescriptor(in_nchw_desc));
|
||||
Tensor<in_data_t> wei_kcyx(make_TensorDescriptor(wei_kcyx_desc));
|
||||
Tensor<out_data_t> out_nkhw_host(make_TensorDescriptor(out_nkhw_desc));
|
||||
@@ -635,9 +647,9 @@ int main(int argc, char* argv[])
|
||||
device_direct_convolution_1
|
||||
#elif 0
|
||||
device_direct_convolution_2_nchw_kcyx_nkhw
|
||||
#elif 1
|
||||
device_direct_convolution_2_vectorized_nchw_kcyx_nkhw
|
||||
#elif 0
|
||||
device_direct_convolution_2_vectorized_nchw_kcyx_nkhw
|
||||
#elif 1
|
||||
device_implicit_gemm_convolution_1_chwn_cyxk_khwn
|
||||
#elif 0
|
||||
device_implicit_gemm_convolution_2_chwn_cyxk_khwn
|
||||
|
||||
Reference in New Issue
Block a user