Testing all fwd convolution specializations. (#259)

* UniforFill with integer values.

* Log tested instance type string.

* Add UT for all convolution specializations.

* debugging conv

* Fix dangling reference bug.

* Small refinements.

* Fix call to error checking function.

* Small refinements to tests.

* Configure error tolerance
* Change problem size.
* Remove OddC case from types that do not support it.

* Add helper traits for AccumulatorDataType.

* Print first 5 errs in check_err for integral types.

* Rename FillUniform to FillUniformDistribution

* Refactor

* Do not use typed tests.
* Instead use plain fixture class with templatized member functions.
* Initialize tensors with integer values.

* Refine test instances.

* Properly set accumulator data type.
* Add another "big" instance.

* Refactor convolution tests.

* Revert "debugging conv"

This reverts commit b109516455.

* Add pragma once + format + small refinement.

* Fix some unwanted changes.

* Clang-format

* Fix profile_convnd to use renamed tensor initializer.

* Add instances for ConvFWDND kernel case 2D

* Helpers to get ConvNDFwd 2D instances.

* Refactoring.

* Remove "small block" instance as it was generating compiler errors.
* Remove default template parameters values.

* Refine and fix test.

* Fix problem with default template parameter types.
* Adjust error thresholds for floating point values test.
* Use integer values initialization for instances test.
* Add tests for ConvNDFwd 2D case.

* Remove AccumulatorDataType type trait.

* Update unit-tests.

* Remove operator<< overload.

* Unlock conv1d/3d nd fwd instances.

* Enable skipping calculating reference using flag.

* Fix number of channels for first ResNet50 layer.

* Clang-format.

Co-authored-by: Adam Osewski <aosewski@amd.com>
Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
Adam Osewski
2022-06-23 05:05:04 +02:00
committed by GitHub
parent 4634b12043
commit a2edd7d802
20 changed files with 1219 additions and 268 deletions

View File

@@ -1,5 +1,4 @@
#ifndef CHECK_ERR_HPP
#define CHECK_ERR_HPP
#pragma once
#include <algorithm>
#include <cmath>
@@ -169,17 +168,34 @@ check_err(const std::vector<T>& out,
return false;
}
bool res{true};
int err_count = 0;
int64_t err = 0;
int64_t max_err = std::numeric_limits<int64_t>::min();
for(std::size_t i = 0; i < ref.size(); ++i)
{
if(out[i] != ref[i])
int64_t o = out[i];
int64_t r = ref[i];
err = std::abs(o - r);
if(err > 0)
{
std::cout << "out[" << i << "] != ref[" << i << "]: " << static_cast<int>(out[i])
<< " != " << static_cast<int>(ref[i]) << std::endl
<< msg << std::endl;
return false;
max_err = err > max_err ? err : max_err;
err_count++;
if(err_count < 5)
{
std::cout << "out[" << i << "] != ref[" << i << "]: " << static_cast<int>(out[i])
<< " != " << static_cast<int>(ref[i]) << std::endl
<< msg << std::endl;
}
res = false;
}
}
return true;
if(!res)
{
std::cout << "max err: " << max_err << std::endl;
}
return res;
}
} // namespace utils
@@ -191,5 +207,3 @@ std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
std::copy(std::begin(v), std::end(v), std::ostream_iterator<T>(os, " "));
return os;
}
#endif

View File

@@ -402,8 +402,8 @@ template <typename InDataType,
typename InElementwiseOp = ck::tensor_operation::element_wise::PassThrough,
typename WeiElementwiseOp = ck::tensor_operation::element_wise::PassThrough,
typename OutElementwiseOp = ck::tensor_operation::element_wise::PassThrough,
typename InputInitFun = FillUniform<InDataType>,
typename WeightsInitFun = FillUniform<WeiDataType>>
typename InputInitFun = FillUniformDistribution<InDataType>,
typename WeightsInitFun = FillUniformDistribution<WeiDataType>>
class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType, WeiDataType>
{
using DeviceConvFwdOp = tensor_operation::device::
@@ -422,8 +422,8 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
ConvFwdOpInstance(const ConvParams& params,
bool do_init = true,
const InputInitFun& input_init_f = InputInitFun{},
const WeightsInitFun& weights_init_f = WeightsInitFun{})
const InputInitFun& input_init_f = InputInitFun(),
const WeightsInitFun& weights_init_f = WeightsInitFun())
: BaseType(),
params_{params},
output_spatial_lengths_{params.GetOutputSpatialLengths()},
@@ -560,8 +560,8 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
const ConvParams& params_;
const std::vector<ck::index_t> output_spatial_lengths_;
const bool do_init_;
const InputInitFun& input_init_f_;
const WeightsInitFun& weights_init_f_;
InputInitFun input_init_f_;
WeightsInitFun weights_init_f_;
};
} // namespace conv

View File

@@ -1,6 +1,7 @@
#pragma once
#include <algorithm>
#include <cmath>
#include <random>
#include "data_type.hpp"
@@ -8,46 +9,56 @@
namespace ck {
namespace utils {
// template <typename T, class Enable = void>
// struct FillUniform;
// TODO: what's wrong with this specialization???
// err: segmentation fault in mt19937 - infinite loop like.
// template <typename T>
// struct FillUniform<T, typename std::enable_if<std::is_integral<T>::value &&
// !std::is_same<T, bhalf_t>::value>::type>
// {
// int a_{0};
// int b_{5};
// // T a_ = T{0};
// // T b_ = T{5};
// template <typename ForwardIter>
// void operator()(ForwardIter first, ForwardIter last) const
// {
// std::mt19937 gen{11939};
// std::uniform_int_distribution<int> dis(a_, b_);
// std::generate(first, last, [&dis, &gen]() { return ck::type_convert<T>(dis(gen)); });
// }
// };
// struct FillUniform<T, typename std::enable_if<std::is_floating_point<T>::value ||
// std::is_same<T, bhalf_t>::value>::type>
template <typename T>
struct FillUniform
struct FillUniformDistribution
{
float a_{0};
float b_{5};
float a_{-5.f};
float b_{5.f};
template <typename ForwardIter>
void operator()(ForwardIter first, ForwardIter last) const
{
std::mt19937 gen{11939};
std::uniform_real_distribution<> dis(a_, b_);
std::mt19937 gen(11939);
std::uniform_real_distribution<float> dis(a_, b_);
std::generate(first, last, [&dis, &gen]() { return ck::type_convert<T>(dis(gen)); });
}
};
// Normally FillUniformDistributionIntegerValue should use std::uniform_int_distribution as below.
// However this produces segfaults in std::mt19937 which look like inifite loop.
// template <typename T>
// struct FillUniformDistributionIntegerValue
// {
// int a_{-5};
// int b_{5};
//
// template <typename ForwardIter>
// void operator()(ForwardIter first, ForwardIter last) const
// {
// std::mt19937 gen(11939);
// std::uniform_int_distribution<int> dis(a_, b_);
// std::generate(
// first, last, [&dis, &gen]() { return ck::type_convert<T>(dis(gen)); });
// }
// };
// Workaround for uniform_int_distribution not working as expected. See note above.<
template <typename T>
struct FillUniformDistributionIntegerValue
{
float a_{-5.f};
float b_{5.f};
template <typename ForwardIter>
void operator()(ForwardIter first, ForwardIter last) const
{
std::mt19937 gen(11939);
std::uniform_real_distribution<float> dis(a_, b_);
std::generate(
first, last, [&dis, &gen]() { return ck::type_convert<T>(std::round(dis(gen))); });
}
};
template <typename T>
struct FillMonotonicSeq
{

View File

@@ -1,6 +1,7 @@
#pragma once
#include <cstdlib>
#include <iostream>
#include <limits>
#include <memory>
#include <stdexcept>
@@ -78,7 +79,8 @@ class OpInstanceRunEngine
template <typename ReferenceOp = std::function<void()>>
OpInstanceRunEngine(const OpInstanceT& op_instance,
const ReferenceOp& reference_op = ReferenceOp{})
const ReferenceOp& reference_op = ReferenceOp{},
bool do_verification = true)
: op_instance_{op_instance}
{
in_tensors_ = op_instance_.GetInputTensors();
@@ -88,8 +90,11 @@ class OpInstanceRunEngine
const Tensor<InArgTypes>&...,
Tensor<OutDataType>&>)
{
ref_output_ = op_instance_.GetOutputTensor();
CallRefOpUnpackArgs(reference_op, std::make_index_sequence<kNInArgs_>{});
if(do_verification)
{
ref_output_ = op_instance_.GetOutputTensor();
CallRefOpUnpackArgs(reference_op, std::make_index_sequence<kNInArgs_>{});
}
}
AllocateDeviceInputTensors(std::make_index_sequence<kNInArgs_>{});
out_device_buffer_ =
@@ -110,6 +115,7 @@ class OpInstanceRunEngine
op_ptr.get(), in_device_buffers_, out_device_buffer_);
if(op_ptr->IsSupportedArgument(argument.get()))
{
std::cout << "Testing instance: " << op_ptr->GetTypeString() << std::endl;
invoker->Run(argument.get());
out_device_buffer_->FromDevice(out_tensor_->mData.data());
if(!ref_output_)
@@ -119,9 +125,16 @@ class OpInstanceRunEngine
" You have to provide reference function.");
}
// TODO: enable flexible use of custom check_error functions
res = res && check_err(out_tensor_->mData, ref_output_->mData);
bool inst_res = CheckErr(out_tensor_->mData, ref_output_->mData);
std::cout << (inst_res ? "SUCCESS" : "FAILURE") << std::endl;
res = res && inst_res;
out_device_buffer_->SetZero();
}
else
{
std::cout << "Given conv problem is not supported by instance: \n\t>>>>"
<< op_ptr->GetTypeString() << std::endl;
}
}
return res;
}
@@ -132,7 +145,6 @@ class OpInstanceRunEngine
bool do_verification = false,
bool do_log = false)
{
bool res{true};
ProfileBestConfig best_config;
for(auto& op_ptr : op_ptrs)
@@ -153,7 +165,7 @@ class OpInstanceRunEngine
std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< " GB/s, " << op_name << std::endl;
if(tflops < best_config.best_tflops)
if(avg_time < best_config.best_avg_time)
{
best_config.best_op_name = op_name;
best_config.best_tflops = tflops;
@@ -171,7 +183,7 @@ class OpInstanceRunEngine
" You have to provide reference function.");
}
// TODO: enable flexible use of custom check_error functions
res = res && CheckErr(out_tensor_->mData, ref_output_->mData);
CheckErr(out_tensor_->mData, ref_output_->mData);
if(do_log) {}
}
@@ -223,7 +235,7 @@ class OpInstanceRunEngine
template <typename T>
bool CheckErr(const std::vector<T>& dev_out, const std::vector<T>& ref_out) const
{
return ck::utils::check_err(dev_out, ref_out, "Error: incorrect results!", atol_, rtol_);
return ck::utils::check_err(dev_out, ref_out, "Error: incorrect results!", rtol_, atol_);
}
};