improved blockwise_tensor_op

This commit is contained in:
Chao Liu
2018-11-14 08:55:45 -06:00
parent ff31af2227
commit 1812666a47
5 changed files with 472 additions and 154 deletions

View File

@@ -26,7 +26,7 @@ struct GeneratorTensor
T operator()(Is... is)
{
#if 1
return std::rand() / RAND_MAX;
return T(std::rand()) / T(RAND_MAX);
#elif 0
std::initializer_list<std::size_t> ls = {static_cast<std::size_t>(is)...};
@@ -142,8 +142,8 @@ void device_convolution(
constexpr unsigned NBlockCopyLen0 = 1;
constexpr unsigned NBlockCopyLen1 = 1;
constexpr unsigned NBlockCopyLen2 = 2;
constexpr unsigned NBlockCopyLen3 = 16;
constexpr unsigned NBlockCopyLen2 = 4;
constexpr unsigned NBlockCopyLen3 = 32;
constexpr unsigned BlockSize = 128;