mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-21 21:39:15 +00:00
Testing all fwd convolution specializations. (#259)
* UniforFill with integer values.
* Log tested instance type string.
* Add UT for all convolution specializations.
* debugging conv
* Fix dangling reference bug.
* Small refinements.
* Fix call to error checking function.
* Small refinements to tests.
* Configure error tolerance
* Change problem size.
* Remove OddC case from types that do not support it.
* Add helper traits for AccumulatorDataType.
* Print first 5 errs in check_err for integral types.
* Rename FillUniform to FillUniformDistribution
* Refactor
* Do not use typed tests.
* Instead use plain fixture class with templatized member functions.
* Initialize tensors with integer values.
* Refine test instances.
* Properly set accumulator data type.
* Add another "big" instance.
* Refactor convolution tests.
* Revert "debugging conv"
This reverts commit b109516455.
* Add pragma once + format + small refinement.
* Fix some unwanted changes.
* Clang-format
* Fix profile_convnd to use renamed tensor initializer.
* Add instances for ConvFWDND kernel case 2D
* Helpers to get ConvNDFwd 2D instances.
* Refactoring.
* Remove "small block" instance as it was generating compiler errors.
* Remove default template parameters values.
* Refine and fix test.
* Fix problem with default template parameter types.
* Adjust error thresholds for floating point values test.
* Use integer values initialization for instances test.
* Add tests for ConvNDFwd 2D case.
* Remove AccumulatorDataType type trait.
* Update unit-tests.
* Remove operator<< overload.
* Unlock conv1d/3d nd fwd instances.
* Enable skipping calculating reference using flag.
* Fix number of channels for first ResNet50 layer.
* Clang-format.
Co-authored-by: Adam Osewski <aosewski@amd.com>
Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
#ifndef CHECK_ERR_HPP
|
||||
#define CHECK_ERR_HPP
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
@@ -169,17 +168,34 @@ check_err(const std::vector<T>& out,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool res{true};
|
||||
int err_count = 0;
|
||||
int64_t err = 0;
|
||||
int64_t max_err = std::numeric_limits<int64_t>::min();
|
||||
for(std::size_t i = 0; i < ref.size(); ++i)
|
||||
{
|
||||
if(out[i] != ref[i])
|
||||
int64_t o = out[i];
|
||||
int64_t r = ref[i];
|
||||
err = std::abs(o - r);
|
||||
|
||||
if(err > 0)
|
||||
{
|
||||
std::cout << "out[" << i << "] != ref[" << i << "]: " << static_cast<int>(out[i])
|
||||
<< " != " << static_cast<int>(ref[i]) << std::endl
|
||||
<< msg << std::endl;
|
||||
return false;
|
||||
max_err = err > max_err ? err : max_err;
|
||||
err_count++;
|
||||
if(err_count < 5)
|
||||
{
|
||||
std::cout << "out[" << i << "] != ref[" << i << "]: " << static_cast<int>(out[i])
|
||||
<< " != " << static_cast<int>(ref[i]) << std::endl
|
||||
<< msg << std::endl;
|
||||
}
|
||||
res = false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
if(!res)
|
||||
{
|
||||
std::cout << "max err: " << max_err << std::endl;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
@@ -191,5 +207,3 @@ std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
|
||||
std::copy(std::begin(v), std::end(v), std::ostream_iterator<T>(os, " "));
|
||||
return os;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -402,8 +402,8 @@ template <typename InDataType,
|
||||
typename InElementwiseOp = ck::tensor_operation::element_wise::PassThrough,
|
||||
typename WeiElementwiseOp = ck::tensor_operation::element_wise::PassThrough,
|
||||
typename OutElementwiseOp = ck::tensor_operation::element_wise::PassThrough,
|
||||
typename InputInitFun = FillUniform<InDataType>,
|
||||
typename WeightsInitFun = FillUniform<WeiDataType>>
|
||||
typename InputInitFun = FillUniformDistribution<InDataType>,
|
||||
typename WeightsInitFun = FillUniformDistribution<WeiDataType>>
|
||||
class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType, WeiDataType>
|
||||
{
|
||||
using DeviceConvFwdOp = tensor_operation::device::
|
||||
@@ -422,8 +422,8 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
|
||||
|
||||
ConvFwdOpInstance(const ConvParams& params,
|
||||
bool do_init = true,
|
||||
const InputInitFun& input_init_f = InputInitFun{},
|
||||
const WeightsInitFun& weights_init_f = WeightsInitFun{})
|
||||
const InputInitFun& input_init_f = InputInitFun(),
|
||||
const WeightsInitFun& weights_init_f = WeightsInitFun())
|
||||
: BaseType(),
|
||||
params_{params},
|
||||
output_spatial_lengths_{params.GetOutputSpatialLengths()},
|
||||
@@ -560,8 +560,8 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
|
||||
const ConvParams& params_;
|
||||
const std::vector<ck::index_t> output_spatial_lengths_;
|
||||
const bool do_init_;
|
||||
const InputInitFun& input_init_f_;
|
||||
const WeightsInitFun& weights_init_f_;
|
||||
InputInitFun input_init_f_;
|
||||
WeightsInitFun weights_init_f_;
|
||||
};
|
||||
|
||||
} // namespace conv
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <random>
|
||||
|
||||
#include "data_type.hpp"
|
||||
@@ -8,46 +9,56 @@
|
||||
namespace ck {
|
||||
namespace utils {
|
||||
|
||||
// template <typename T, class Enable = void>
|
||||
// struct FillUniform;
|
||||
|
||||
// TODO: what's wrong with this specialization???
|
||||
// err: segmentation fault in mt19937 - infinite loop like.
|
||||
// template <typename T>
|
||||
// struct FillUniform<T, typename std::enable_if<std::is_integral<T>::value &&
|
||||
// !std::is_same<T, bhalf_t>::value>::type>
|
||||
// {
|
||||
// int a_{0};
|
||||
// int b_{5};
|
||||
// // T a_ = T{0};
|
||||
// // T b_ = T{5};
|
||||
|
||||
// template <typename ForwardIter>
|
||||
// void operator()(ForwardIter first, ForwardIter last) const
|
||||
// {
|
||||
// std::mt19937 gen{11939};
|
||||
// std::uniform_int_distribution<int> dis(a_, b_);
|
||||
// std::generate(first, last, [&dis, &gen]() { return ck::type_convert<T>(dis(gen)); });
|
||||
// }
|
||||
// };
|
||||
|
||||
// struct FillUniform<T, typename std::enable_if<std::is_floating_point<T>::value ||
|
||||
// std::is_same<T, bhalf_t>::value>::type>
|
||||
template <typename T>
|
||||
struct FillUniform
|
||||
struct FillUniformDistribution
|
||||
{
|
||||
float a_{0};
|
||||
float b_{5};
|
||||
float a_{-5.f};
|
||||
float b_{5.f};
|
||||
|
||||
template <typename ForwardIter>
|
||||
void operator()(ForwardIter first, ForwardIter last) const
|
||||
{
|
||||
std::mt19937 gen{11939};
|
||||
std::uniform_real_distribution<> dis(a_, b_);
|
||||
std::mt19937 gen(11939);
|
||||
std::uniform_real_distribution<float> dis(a_, b_);
|
||||
std::generate(first, last, [&dis, &gen]() { return ck::type_convert<T>(dis(gen)); });
|
||||
}
|
||||
};
|
||||
|
||||
// Normally FillUniformDistributionIntegerValue should use std::uniform_int_distribution as below.
|
||||
// However this produces segfaults in std::mt19937 which look like inifite loop.
|
||||
// template <typename T>
|
||||
// struct FillUniformDistributionIntegerValue
|
||||
// {
|
||||
// int a_{-5};
|
||||
// int b_{5};
|
||||
//
|
||||
// template <typename ForwardIter>
|
||||
// void operator()(ForwardIter first, ForwardIter last) const
|
||||
// {
|
||||
// std::mt19937 gen(11939);
|
||||
// std::uniform_int_distribution<int> dis(a_, b_);
|
||||
// std::generate(
|
||||
// first, last, [&dis, &gen]() { return ck::type_convert<T>(dis(gen)); });
|
||||
// }
|
||||
// };
|
||||
|
||||
// Workaround for uniform_int_distribution not working as expected. See note above.<
|
||||
template <typename T>
|
||||
struct FillUniformDistributionIntegerValue
|
||||
{
|
||||
float a_{-5.f};
|
||||
float b_{5.f};
|
||||
|
||||
template <typename ForwardIter>
|
||||
void operator()(ForwardIter first, ForwardIter last) const
|
||||
{
|
||||
std::mt19937 gen(11939);
|
||||
std::uniform_real_distribution<float> dis(a_, b_);
|
||||
std::generate(
|
||||
first, last, [&dis, &gen]() { return ck::type_convert<T>(std::round(dis(gen))); });
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct FillMonotonicSeq
|
||||
{
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
@@ -78,7 +79,8 @@ class OpInstanceRunEngine
|
||||
|
||||
template <typename ReferenceOp = std::function<void()>>
|
||||
OpInstanceRunEngine(const OpInstanceT& op_instance,
|
||||
const ReferenceOp& reference_op = ReferenceOp{})
|
||||
const ReferenceOp& reference_op = ReferenceOp{},
|
||||
bool do_verification = true)
|
||||
: op_instance_{op_instance}
|
||||
{
|
||||
in_tensors_ = op_instance_.GetInputTensors();
|
||||
@@ -88,8 +90,11 @@ class OpInstanceRunEngine
|
||||
const Tensor<InArgTypes>&...,
|
||||
Tensor<OutDataType>&>)
|
||||
{
|
||||
ref_output_ = op_instance_.GetOutputTensor();
|
||||
CallRefOpUnpackArgs(reference_op, std::make_index_sequence<kNInArgs_>{});
|
||||
if(do_verification)
|
||||
{
|
||||
ref_output_ = op_instance_.GetOutputTensor();
|
||||
CallRefOpUnpackArgs(reference_op, std::make_index_sequence<kNInArgs_>{});
|
||||
}
|
||||
}
|
||||
AllocateDeviceInputTensors(std::make_index_sequence<kNInArgs_>{});
|
||||
out_device_buffer_ =
|
||||
@@ -110,6 +115,7 @@ class OpInstanceRunEngine
|
||||
op_ptr.get(), in_device_buffers_, out_device_buffer_);
|
||||
if(op_ptr->IsSupportedArgument(argument.get()))
|
||||
{
|
||||
std::cout << "Testing instance: " << op_ptr->GetTypeString() << std::endl;
|
||||
invoker->Run(argument.get());
|
||||
out_device_buffer_->FromDevice(out_tensor_->mData.data());
|
||||
if(!ref_output_)
|
||||
@@ -119,9 +125,16 @@ class OpInstanceRunEngine
|
||||
" You have to provide reference function.");
|
||||
}
|
||||
// TODO: enable flexible use of custom check_error functions
|
||||
res = res && check_err(out_tensor_->mData, ref_output_->mData);
|
||||
bool inst_res = CheckErr(out_tensor_->mData, ref_output_->mData);
|
||||
std::cout << (inst_res ? "SUCCESS" : "FAILURE") << std::endl;
|
||||
res = res && inst_res;
|
||||
out_device_buffer_->SetZero();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Given conv problem is not supported by instance: \n\t>>>>"
|
||||
<< op_ptr->GetTypeString() << std::endl;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@@ -132,7 +145,6 @@ class OpInstanceRunEngine
|
||||
bool do_verification = false,
|
||||
bool do_log = false)
|
||||
{
|
||||
bool res{true};
|
||||
ProfileBestConfig best_config;
|
||||
|
||||
for(auto& op_ptr : op_ptrs)
|
||||
@@ -153,7 +165,7 @@ class OpInstanceRunEngine
|
||||
std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec
|
||||
<< " GB/s, " << op_name << std::endl;
|
||||
|
||||
if(tflops < best_config.best_tflops)
|
||||
if(avg_time < best_config.best_avg_time)
|
||||
{
|
||||
best_config.best_op_name = op_name;
|
||||
best_config.best_tflops = tflops;
|
||||
@@ -171,7 +183,7 @@ class OpInstanceRunEngine
|
||||
" You have to provide reference function.");
|
||||
}
|
||||
// TODO: enable flexible use of custom check_error functions
|
||||
res = res && CheckErr(out_tensor_->mData, ref_output_->mData);
|
||||
CheckErr(out_tensor_->mData, ref_output_->mData);
|
||||
|
||||
if(do_log) {}
|
||||
}
|
||||
@@ -223,7 +235,7 @@ class OpInstanceRunEngine
|
||||
template <typename T>
|
||||
bool CheckErr(const std::vector<T>& dev_out, const std::vector<T>& ref_out) const
|
||||
{
|
||||
return ck::utils::check_err(dev_out, ref_out, "Error: incorrect results!", atol_, rtol_);
|
||||
return ck::utils::check_err(dev_out, ref_out, "Error: incorrect results!", rtol_, atol_);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user