mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
* Turning compare warnings on * Cleaning part I * Cleaning part II * Explicit static_cast to ck::type_convert * Resolving large tensor size issue. * format * revert change to tensor descriptor; promote lementSpaceSize to 64bit * use integer value for GEMM test * Review remarks * Review remarks + issues with (un)signed arithmetic * Format fix * Format * Clang-format. * fix 2gb limit issue Co-authored-by: Chao Liu <chao.liu2@amd.com> Co-authored-by: Adam Osewski <aosewski@amd.com>
This commit is contained in:
@@ -211,7 +211,8 @@ struct ReductionHost
|
||||
AccDataType accuVal = ReduceOpZeroVal<AccDataType, ReduceOpId>();
|
||||
IndexDataType accuIndex = 0;
|
||||
|
||||
for(IndexDataType i = 0; i < reduce_dim_indexes.size(); i++)
|
||||
for(IndexDataType i = 0; i < ck::type_convert<IndexDataType>(reduce_dim_indexes.size());
|
||||
i++)
|
||||
{
|
||||
auto offset_reduce =
|
||||
get_offset_from_index<NumReduceDim>(reduceStrides, reduce_dim_indexes[i]);
|
||||
@@ -246,7 +247,9 @@ struct ReductionHost
|
||||
auto offset_invariant =
|
||||
get_offset_from_index<NumInvariantDim>(invariantStrides, invariant_index);
|
||||
|
||||
for(IndexDataType i = 0; i < reduce_dim_indexes.size(); i++)
|
||||
for(IndexDataType i = 0;
|
||||
i < ck::type_convert<IndexDataType>(reduce_dim_indexes.size());
|
||||
i++)
|
||||
{
|
||||
auto offset_reduce =
|
||||
get_offset_from_index<NumReduceDim>(reduceStrides, reduce_dim_indexes[i]);
|
||||
|
||||
@@ -154,7 +154,7 @@ struct ParallelTensorFunctor
|
||||
{
|
||||
std::array<std::size_t, NDIM> indices;
|
||||
|
||||
for(int idim = 0; idim < NDIM; ++idim)
|
||||
for(std::size_t idim = 0; idim < NDIM; ++idim)
|
||||
{
|
||||
indices[idim] = i / mStrides[idim];
|
||||
i -= indices[idim] * mStrides[idim];
|
||||
@@ -316,7 +316,7 @@ float check_error(const Tensor<T>& ref, const Tensor<T>& result)
|
||||
|
||||
constexpr float eps = 1e-10;
|
||||
|
||||
for(int i = 0; i < ref.mData.size(); ++i)
|
||||
for(std::size_t i = 0; i < ref.mData.size(); ++i)
|
||||
{
|
||||
float ref_v = ck::type_convert<float>(ref.mData[i]);
|
||||
float result_v = ck::type_convert<float>(result.mData[i]);
|
||||
|
||||
@@ -70,18 +70,25 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
|
||||
constexpr auto I1 = Number<1>{};
|
||||
auto f_kcyx = [&](auto k, auto c, auto y, auto x) {
|
||||
float v_acc = 0;
|
||||
for(int n = 0; n < arg.out_n_k_ho_wo_.mDesc.GetLengths()[0]; ++n)
|
||||
for(std::size_t n = 0; n < arg.out_n_k_ho_wo_.mDesc.GetLengths()[0]; ++n)
|
||||
{
|
||||
for(int ho = 0; ho < arg.out_n_k_ho_wo_.mDesc.GetLengths()[2]; ++ho)
|
||||
for(std::size_t ho = 0; ho < arg.out_n_k_ho_wo_.mDesc.GetLengths()[2]; ++ho)
|
||||
{
|
||||
int hi = ho * arg.conv_strides_[I0] + y * arg.conv_dilations_[I0] -
|
||||
arg.in_left_pads_[I0];
|
||||
for(int wo = 0; wo < arg.out_n_k_ho_wo_.mDesc.GetLengths()[3]; ++wo)
|
||||
auto hi = ck::type_convert<ck::long_index_t>(ho * arg.conv_strides_[I0]) +
|
||||
ck::type_convert<ck::long_index_t>(y * arg.conv_dilations_[I0]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[I0]);
|
||||
for(std::size_t wo = 0; wo < arg.out_n_k_ho_wo_.mDesc.GetLengths()[3]; ++wo)
|
||||
{
|
||||
int wi = wo * arg.conv_strides_[I1] + x * arg.conv_dilations_[I1] -
|
||||
arg.in_left_pads_[I1];
|
||||
if(hi >= 0 && hi < arg.in_n_c_hi_wi_.mDesc.GetLengths()[2] && wi >= 0 &&
|
||||
wi < arg.in_n_c_hi_wi_.mDesc.GetLengths()[3])
|
||||
auto wi =
|
||||
ck::type_convert<ck::long_index_t>(wo * arg.conv_strides_[I1]) +
|
||||
ck::type_convert<ck::long_index_t>(x * arg.conv_dilations_[I1]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[I1]);
|
||||
if(hi >= 0 &&
|
||||
ck::type_convert<std::size_t>(hi) <
|
||||
arg.in_n_c_hi_wi_.mDesc.GetLengths()[2] &&
|
||||
wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) <
|
||||
arg.in_n_c_hi_wi_.mDesc.GetLengths()[3])
|
||||
{
|
||||
float v_out;
|
||||
float v_in;
|
||||
|
||||
@@ -78,15 +78,18 @@ struct ReferenceConvBwdData : public device::BaseOperator
|
||||
|
||||
AccDataType v_acc = 0;
|
||||
|
||||
for(int x = 0; x < X; ++x)
|
||||
for(std::size_t x = 0; x < X; ++x)
|
||||
{
|
||||
int w_tmp = wi + arg.in_left_pads_[0] - x * arg.conv_dilations_[0];
|
||||
auto w_tmp = ck::type_convert<ck::long_index_t>(wi) +
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(x * arg.conv_dilations_[0]);
|
||||
if(w_tmp % arg.conv_strides_[0] == 0)
|
||||
{
|
||||
int wo = w_tmp / arg.conv_strides_[0];
|
||||
if(wo >= 0 && wo < Wo)
|
||||
auto wo = ck::type_convert<ck::long_index_t>(w_tmp) /
|
||||
ck::type_convert<ck::long_index_t>(arg.conv_strides_[0]);
|
||||
if(wo >= 0 && ck::type_convert<std::size_t>(wo) < Wo)
|
||||
{
|
||||
for(int k = 0; k < K; ++k)
|
||||
for(std::size_t k = 0; k < K; ++k)
|
||||
{
|
||||
AccDataType v_out = 0;
|
||||
AccDataType v_wei = 0;
|
||||
@@ -128,24 +131,32 @@ struct ReferenceConvBwdData : public device::BaseOperator
|
||||
|
||||
AccDataType v_acc = 0;
|
||||
|
||||
for(int y = 0; y < Y; ++y)
|
||||
for(std::size_t y = 0; y < Y; ++y)
|
||||
{
|
||||
int h_tmp = hi + arg.in_left_pads_[0] - y * arg.conv_dilations_[0];
|
||||
auto h_tmp = ck::type_convert<ck::long_index_t>(hi) +
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(y * arg.conv_dilations_[0]);
|
||||
if(h_tmp % arg.conv_strides_[0] == 0)
|
||||
{
|
||||
int ho = h_tmp / arg.conv_strides_[0];
|
||||
if(ho >= 0 && ho < Ho)
|
||||
auto ho = ck::type_convert<ck::long_index_t>(h_tmp) /
|
||||
ck::type_convert<ck::long_index_t>(arg.conv_strides_[0]);
|
||||
if(ho >= 0 && ck::type_convert<std::size_t>(ho) < Ho)
|
||||
{
|
||||
for(int x = 0; x < X; ++x)
|
||||
for(std::size_t x = 0; x < X; ++x)
|
||||
{
|
||||
int w_tmp =
|
||||
wi + arg.in_left_pads_[1] - x * arg.conv_dilations_[1];
|
||||
auto w_tmp =
|
||||
ck::type_convert<ck::long_index_t>(wi) +
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[1]) -
|
||||
ck::type_convert<ck::long_index_t>(x *
|
||||
arg.conv_dilations_[1]);
|
||||
if(w_tmp % arg.conv_strides_[1] == 0)
|
||||
{
|
||||
int wo = w_tmp / arg.conv_strides_[1];
|
||||
if(wo >= 0 && wo < Wo)
|
||||
auto wo = ck::type_convert<ck::long_index_t>(w_tmp) /
|
||||
ck::type_convert<ck::long_index_t>(
|
||||
arg.conv_strides_[1]);
|
||||
if(wo >= 0 && ck::type_convert<std::size_t>(wo) < Wo)
|
||||
{
|
||||
for(int k = 0; k < K; ++k)
|
||||
for(std::size_t k = 0; k < K; ++k)
|
||||
{
|
||||
AccDataType v_out = 0;
|
||||
AccDataType v_wei = 0;
|
||||
@@ -194,33 +205,49 @@ struct ReferenceConvBwdData : public device::BaseOperator
|
||||
|
||||
AccDataType v_acc = 0;
|
||||
|
||||
for(int z = 0; z < Z; ++z)
|
||||
for(std::size_t z = 0; z < Z; ++z)
|
||||
{
|
||||
int d_tmp = di + arg.in_left_pads_[0] - z * arg.conv_dilations_[0];
|
||||
auto d_tmp = ck::type_convert<ck::long_index_t>(di) +
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(z * arg.conv_dilations_[0]);
|
||||
if(d_tmp % arg.conv_strides_[0] == 0)
|
||||
{
|
||||
int do_ = d_tmp / arg.conv_strides_[0];
|
||||
if(do_ >= 0 && do_ < Do)
|
||||
auto do_ = ck::type_convert<ck::long_index_t>(d_tmp) /
|
||||
ck::type_convert<ck::long_index_t>(arg.conv_strides_[0]);
|
||||
if(do_ >= 0 && ck::type_convert<std::size_t>(do_) < Do)
|
||||
{
|
||||
for(int y = 0; y < Y; ++y)
|
||||
for(std::size_t y = 0; y < Y; ++y)
|
||||
{
|
||||
int h_tmp =
|
||||
hi + arg.in_left_pads_[1] - y * arg.conv_dilations_[1];
|
||||
auto h_tmp =
|
||||
ck::type_convert<ck::long_index_t>(hi) +
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[1]) -
|
||||
ck::type_convert<ck::long_index_t>(y *
|
||||
arg.conv_dilations_[1]);
|
||||
if(h_tmp % arg.conv_strides_[1] == 0)
|
||||
{
|
||||
int ho = h_tmp / arg.conv_strides_[1];
|
||||
if(ho >= 0 && ho < Ho)
|
||||
auto ho = ck::type_convert<ck::long_index_t>(h_tmp) /
|
||||
ck::type_convert<ck::long_index_t>(
|
||||
arg.conv_strides_[1]);
|
||||
if(ho >= 0 && ck::type_convert<std::size_t>(ho) < Ho)
|
||||
{
|
||||
for(int x = 0; x < X; ++x)
|
||||
for(std::size_t x = 0; x < X; ++x)
|
||||
{
|
||||
int w_tmp = wi + arg.in_left_pads_[2] -
|
||||
x * arg.conv_dilations_[2];
|
||||
auto w_tmp =
|
||||
ck::type_convert<ck::long_index_t>(wi) +
|
||||
ck::type_convert<ck::long_index_t>(
|
||||
arg.in_left_pads_[2]) -
|
||||
ck::type_convert<ck::long_index_t>(
|
||||
x * arg.conv_dilations_[2]);
|
||||
if(w_tmp % arg.conv_strides_[2] == 0)
|
||||
{
|
||||
int wo = w_tmp / arg.conv_strides_[2];
|
||||
if(wo >= 0 && wo < Wo)
|
||||
auto wo =
|
||||
ck::type_convert<ck::long_index_t>(w_tmp) /
|
||||
ck::type_convert<ck::long_index_t>(
|
||||
arg.conv_strides_[2]);
|
||||
if(wo >= 0 &&
|
||||
ck::type_convert<std::size_t>(wo) < Wo)
|
||||
{
|
||||
for(int k = 0; k < K; ++k)
|
||||
for(std::size_t k = 0; k < K; ++k)
|
||||
{
|
||||
AccDataType v_out = 0;
|
||||
AccDataType v_wei = 0;
|
||||
|
||||
@@ -88,13 +88,16 @@ struct ReferenceConvFwd : public device::BaseOperator
|
||||
auto f_ncw = [&](auto n, auto k, auto wo) {
|
||||
float v_acc = 0;
|
||||
|
||||
for(int c = 0; c < arg.weight_.mDesc.GetLengths()[1]; ++c)
|
||||
for(std::size_t c = 0; c < arg.weight_.mDesc.GetLengths()[1]; ++c)
|
||||
{
|
||||
for(int x = 0; x < arg.weight_.mDesc.GetLengths()[2]; ++x)
|
||||
for(std::size_t x = 0; x < arg.weight_.mDesc.GetLengths()[2]; ++x)
|
||||
{
|
||||
int wi = wo * arg.conv_strides_[0] + x * arg.conv_dilations_[0] -
|
||||
arg.in_left_pads_[0];
|
||||
if(wi >= 0 && wi < arg.input_.mDesc.GetLengths()[2])
|
||||
auto wi =
|
||||
ck::type_convert<ck::long_index_t>(wo * arg.conv_strides_[0]) +
|
||||
ck::type_convert<ck::long_index_t>(x * arg.conv_dilations_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
if(wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) < arg.input_.mDesc.GetLengths()[2])
|
||||
{
|
||||
float v_in;
|
||||
float v_wei;
|
||||
@@ -128,18 +131,26 @@ struct ReferenceConvFwd : public device::BaseOperator
|
||||
auto f_nchw = [&](auto n, auto k, auto ho, auto wo) {
|
||||
float v_acc = 0;
|
||||
|
||||
for(int c = 0; c < arg.weight_.mDesc.GetLengths()[1]; ++c)
|
||||
for(std::size_t c = 0; c < arg.weight_.mDesc.GetLengths()[1]; ++c)
|
||||
{
|
||||
for(int y = 0; y < arg.weight_.mDesc.GetLengths()[2]; ++y)
|
||||
for(std::size_t y = 0; y < arg.weight_.mDesc.GetLengths()[2]; ++y)
|
||||
{
|
||||
int hi = ho * arg.conv_strides_[0] + y * arg.conv_dilations_[0] -
|
||||
arg.in_left_pads_[0];
|
||||
for(int x = 0; x < arg.weight_.mDesc.GetLengths()[3]; ++x)
|
||||
auto hi =
|
||||
ck::type_convert<ck::long_index_t>(ho * arg.conv_strides_[0]) +
|
||||
ck::type_convert<ck::long_index_t>(y * arg.conv_dilations_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
for(std::size_t x = 0; x < arg.weight_.mDesc.GetLengths()[3]; ++x)
|
||||
{
|
||||
int wi = wo * arg.conv_strides_[1] + x * arg.conv_dilations_[1] -
|
||||
arg.in_left_pads_[1];
|
||||
if(hi >= 0 && hi < arg.input_.mDesc.GetLengths()[2] && wi >= 0 &&
|
||||
wi < arg.input_.mDesc.GetLengths()[3])
|
||||
auto wi =
|
||||
ck::type_convert<ck::long_index_t>(wo * arg.conv_strides_[1]) +
|
||||
ck::type_convert<ck::long_index_t>(x * arg.conv_dilations_[1]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[1]);
|
||||
if(hi >= 0 &&
|
||||
ck::type_convert<std::size_t>(hi) <
|
||||
arg.input_.mDesc.GetLengths()[2] &&
|
||||
wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) <
|
||||
arg.input_.mDesc.GetLengths()[3])
|
||||
{
|
||||
float v_in;
|
||||
float v_wei;
|
||||
@@ -174,23 +185,37 @@ struct ReferenceConvFwd : public device::BaseOperator
|
||||
auto f_nchw = [&](auto n, auto k, auto d_o, auto ho, auto wo) {
|
||||
float v_acc = 0;
|
||||
|
||||
for(int c = 0; c < arg.weight_.mDesc.GetLengths()[1]; ++c)
|
||||
for(std::size_t c = 0; c < arg.weight_.mDesc.GetLengths()[1]; ++c)
|
||||
{
|
||||
for(int z = 0; z < arg.weight_.mDesc.GetLengths()[2]; ++z)
|
||||
for(std::size_t z = 0; z < arg.weight_.mDesc.GetLengths()[2]; ++z)
|
||||
{
|
||||
int di = d_o * arg.conv_strides_[0] + z * arg.conv_dilations_[0] -
|
||||
arg.in_left_pads_[0];
|
||||
for(int y = 0; y < arg.weight_.mDesc.GetLengths()[3]; ++y)
|
||||
auto di =
|
||||
ck::type_convert<ck::long_index_t>(d_o * arg.conv_strides_[0]) +
|
||||
ck::type_convert<ck::long_index_t>(z * arg.conv_dilations_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
for(std::size_t y = 0; y < arg.weight_.mDesc.GetLengths()[3]; ++y)
|
||||
{
|
||||
int hi = ho * arg.conv_strides_[1] + y * arg.conv_dilations_[1] -
|
||||
arg.in_left_pads_[1];
|
||||
for(int x = 0; x < arg.weight_.mDesc.GetLengths()[4]; ++x)
|
||||
auto hi =
|
||||
ck::type_convert<ck::long_index_t>(ho * arg.conv_strides_[1]) +
|
||||
ck::type_convert<ck::long_index_t>(y * arg.conv_dilations_[1]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[1]);
|
||||
for(std::size_t x = 0; x < arg.weight_.mDesc.GetLengths()[4]; ++x)
|
||||
{
|
||||
int wi = wo * arg.conv_strides_[2] +
|
||||
x * arg.conv_dilations_[2] - arg.in_left_pads_[2];
|
||||
if(di >= 0 && di < arg.input_.mDesc.GetLengths()[2] &&
|
||||
hi >= 0 && hi < arg.input_.mDesc.GetLengths()[3] &&
|
||||
wi >= 0 && wi < arg.input_.mDesc.GetLengths()[4])
|
||||
auto wi =
|
||||
ck::type_convert<ck::long_index_t>(wo *
|
||||
arg.conv_strides_[2]) +
|
||||
ck::type_convert<ck::long_index_t>(x *
|
||||
arg.conv_dilations_[2]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[2]);
|
||||
if(di >= 0 &&
|
||||
ck::type_convert<std::size_t>(di) <
|
||||
arg.input_.mDesc.GetLengths()[2] &&
|
||||
hi >= 0 &&
|
||||
ck::type_convert<std::size_t>(hi) <
|
||||
arg.input_.mDesc.GetLengths()[3] &&
|
||||
wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) <
|
||||
arg.input_.mDesc.GetLengths()[4])
|
||||
{
|
||||
float v_in;
|
||||
float v_wei;
|
||||
|
||||
@@ -73,18 +73,25 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
|
||||
auto f_nchw = [&](auto n, auto k, auto ho, auto wo) {
|
||||
float v_acc = 0;
|
||||
|
||||
for(int c = 0; c < arg.wei_k_c_y_x_.mDesc.GetLengths()[1]; ++c)
|
||||
for(std::size_t c = 0; c < arg.wei_k_c_y_x_.mDesc.GetLengths()[1]; ++c)
|
||||
{
|
||||
for(int y = 0; y < arg.wei_k_c_y_x_.mDesc.GetLengths()[2]; ++y)
|
||||
for(std::size_t y = 0; y < arg.wei_k_c_y_x_.mDesc.GetLengths()[2]; ++y)
|
||||
{
|
||||
int hi = ho * arg.conv_strides_[0] + y * arg.conv_dilations_[0] -
|
||||
arg.in_left_pads_[0];
|
||||
for(int x = 0; x < arg.wei_k_c_y_x_.mDesc.GetLengths()[3]; ++x)
|
||||
auto hi = ck::type_convert<ck::long_index_t>(ho * arg.conv_strides_[0]) +
|
||||
ck::type_convert<ck::long_index_t>(y * arg.conv_dilations_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
for(std::size_t x = 0; x < arg.wei_k_c_y_x_.mDesc.GetLengths()[3]; ++x)
|
||||
{
|
||||
int wi = wo * arg.conv_strides_[1] + x * arg.conv_dilations_[1] -
|
||||
arg.in_left_pads_[1];
|
||||
if(hi >= 0 && hi < arg.in_n_c_hi_wi_.mDesc.GetLengths()[2] && wi >= 0 &&
|
||||
wi < arg.in_n_c_hi_wi_.mDesc.GetLengths()[3])
|
||||
auto wi =
|
||||
ck::type_convert<ck::long_index_t>(wo * arg.conv_strides_[1]) +
|
||||
ck::type_convert<ck::long_index_t>(x * arg.conv_dilations_[1]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[1]);
|
||||
if(hi >= 0 &&
|
||||
ck::type_convert<std::size_t>(hi) <
|
||||
arg.in_n_c_hi_wi_.mDesc.GetLengths()[2] &&
|
||||
wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) <
|
||||
arg.in_n_c_hi_wi_.mDesc.GetLengths()[3])
|
||||
{
|
||||
float v_in;
|
||||
float v_wei;
|
||||
|
||||
@@ -76,18 +76,25 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
|
||||
auto f_nchw = [&](auto n, auto k, auto ho, auto wo) {
|
||||
float v_acc = 0;
|
||||
|
||||
for(int c = 0; c < arg.wei_k_c_y_x_.mDesc.GetLengths()[1]; ++c)
|
||||
for(std::size_t c = 0; c < arg.wei_k_c_y_x_.mDesc.GetLengths()[1]; ++c)
|
||||
{
|
||||
for(int y = 0; y < arg.wei_k_c_y_x_.mDesc.GetLengths()[2]; ++y)
|
||||
for(std::size_t y = 0; y < arg.wei_k_c_y_x_.mDesc.GetLengths()[2]; ++y)
|
||||
{
|
||||
int hi = ho * arg.conv_strides_[0] + y * arg.conv_dilations_[0] -
|
||||
arg.in_left_pads_[0];
|
||||
for(int x = 0; x < arg.wei_k_c_y_x_.mDesc.GetLengths()[3]; ++x)
|
||||
auto hi = ck::type_convert<ck::long_index_t>(ho * arg.conv_strides_[0]) +
|
||||
ck::type_convert<ck::long_index_t>(y * arg.conv_dilations_[0]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
for(std::size_t x = 0; x < arg.wei_k_c_y_x_.mDesc.GetLengths()[3]; ++x)
|
||||
{
|
||||
int wi = wo * arg.conv_strides_[1] + x * arg.conv_dilations_[1] -
|
||||
arg.in_left_pads_[1];
|
||||
if(hi >= 0 && hi < arg.in_n_c_hi_wi_.mDesc.GetLengths()[2] && wi >= 0 &&
|
||||
wi < arg.in_n_c_hi_wi_.mDesc.GetLengths()[3])
|
||||
auto wi =
|
||||
ck::type_convert<ck::long_index_t>(wo * arg.conv_strides_[1]) +
|
||||
ck::type_convert<ck::long_index_t>(x * arg.conv_dilations_[1]) -
|
||||
ck::type_convert<ck::long_index_t>(arg.in_left_pads_[1]);
|
||||
if(hi >= 0 &&
|
||||
ck::type_convert<std::size_t>(hi) <
|
||||
arg.in_n_c_hi_wi_.mDesc.GetLengths()[2] &&
|
||||
wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) <
|
||||
arg.in_n_c_hi_wi_.mDesc.GetLengths()[3])
|
||||
{
|
||||
float v_in;
|
||||
float v_wei;
|
||||
|
||||
@@ -25,7 +25,7 @@ std::size_t HostTensorDescriptor::GetElementSize() const
|
||||
std::size_t HostTensorDescriptor::GetElementSpace() const
|
||||
{
|
||||
std::size_t space = 1;
|
||||
for(int i = 0; i < mLens.size(); ++i)
|
||||
for(std::size_t i = 0; i < mLens.size(); ++i)
|
||||
{
|
||||
space += (mLens[i] - 1) * mStrides[i];
|
||||
}
|
||||
@@ -68,7 +68,7 @@ void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream
|
||||
// FIXME: remove
|
||||
void bf16_to_f32_(const Tensor<ck::bhalf_t>& src, Tensor<float>& dst)
|
||||
{
|
||||
for(int i = 0; i < src.mData.size(); ++i)
|
||||
for(std::size_t i = 0; i < src.mData.size(); ++i)
|
||||
dst.mData[i] = ck::type_convert<float>(src.mData[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -71,11 +71,12 @@ ConvParams::ConvParams(ck::index_t n_dim,
|
||||
input_left_pads(left_pads),
|
||||
input_right_pads(right_pads)
|
||||
{
|
||||
if(filter_spatial_lengths.size() != num_dim_spatial ||
|
||||
input_spatial_lengths.size() != num_dim_spatial ||
|
||||
conv_filter_strides.size() != num_dim_spatial ||
|
||||
conv_filter_dilations.size() != num_dim_spatial ||
|
||||
input_left_pads.size() != num_dim_spatial || input_right_pads.size() != num_dim_spatial)
|
||||
if(ck::type_convert<ck::index_t>(filter_spatial_lengths.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(input_spatial_lengths.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(conv_filter_strides.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(conv_filter_dilations.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(input_left_pads.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(input_right_pads.size()) != num_dim_spatial)
|
||||
{
|
||||
throw(
|
||||
std::runtime_error("ConvParams::GetOutputSpatialLengths: "
|
||||
@@ -85,11 +86,12 @@ ConvParams::ConvParams(ck::index_t n_dim,
|
||||
|
||||
std::vector<ck::index_t> ConvParams::GetOutputSpatialLengths() const
|
||||
{
|
||||
if(filter_spatial_lengths.size() != num_dim_spatial ||
|
||||
input_spatial_lengths.size() != num_dim_spatial ||
|
||||
conv_filter_strides.size() != num_dim_spatial ||
|
||||
conv_filter_dilations.size() != num_dim_spatial ||
|
||||
input_left_pads.size() != num_dim_spatial || input_right_pads.size() != num_dim_spatial)
|
||||
if(ck::type_convert<ck::index_t>(filter_spatial_lengths.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(input_spatial_lengths.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(conv_filter_strides.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(conv_filter_dilations.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(input_left_pads.size()) != num_dim_spatial ||
|
||||
ck::type_convert<ck::index_t>(input_right_pads.size()) != num_dim_spatial)
|
||||
{
|
||||
throw(
|
||||
std::runtime_error("ConvParams::GetOutputSpatialLengths: "
|
||||
|
||||
Reference in New Issue
Block a user