Conv perlayer int8 quantization (#471)

* Add conv2d requant example

* Fix bash error

* Rename example

* 1. Rename gemm quantization
2. shares the requantization lambda function with conv

* Refine declare type

* Add conv bias relu quantization exmaple

* clang format

* Fix compile error due to merge develop

* Fix CI error

* Extract quantization post operation into another file

* Support quantization for non piecewise linear function

* Add instance for conv quantization

* Add convolution quantization factory

* Add convolution quantization client example

* Add more instances with different template parameters

* clang format

* Sync the naming with the develop
This commit is contained in:
rocking5566
2022-11-03 03:56:26 +08:00
committed by GitHub
parent 8ee36118be
commit 226bc02b73
20 changed files with 1553 additions and 35 deletions

View File

@@ -7,6 +7,7 @@
#include "ck/utility/math_v2.hpp"
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/quantization_operation.hpp"
namespace ck {
namespace tensor_operation {

View File

@@ -0,0 +1,86 @@
#pragma once
#include "ck/utility/data_type.hpp"
namespace ck {
namespace tensor_operation {
namespace element_wise {
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
template <typename Activation>
struct Activation_Mul_Clamp
{
Activation_Mul_Clamp(float multiplier, Activation activationOp)
: multiplier_(multiplier), activationOp_(activationOp)
{
}
__host__ __device__ constexpr void operator()(int8_t& y, const int32_t& x) const
{
float x_fp32 = ck::type_convert<float>(x);
activationOp_(x_fp32, x_fp32);
float y_fp32 = math::clamp(multiplier_ * x_fp32, -128.f, 127.f);
y = ck::type_convert<int8_t>(y_fp32);
}
__host__ __device__ constexpr void operator()(float& y, const int32_t& x) const
{
// We might type_convert to int8 after lambda in someplace
float x_fp32 = ck::type_convert<float>(x);
activationOp_(x_fp32, x_fp32);
y = math::clamp(multiplier_ * x_fp32, -128.f, 127.f);
}
float multiplier_;
Activation activationOp_;
};
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
template <typename Activation>
struct Add_Activation_Mul_Clamp
{
Add_Activation_Mul_Clamp(float multiplier, Activation activationOp)
: multiplier_(multiplier), activationOp_(activationOp)
{
}
__host__ __device__ constexpr void
operator()(int8_t& y, const int32_t& x1, const int32_t& x2) const
{
float y_fp32 = ck::type_convert<float>(x1 + x2);
activationOp_(y_fp32, y_fp32);
y_fp32 = math::clamp(multiplier_ * y_fp32, -128.f, 127.f);
y = ck::type_convert<int8_t>(y_fp32);
}
float multiplier_;
Activation activationOp_;
};
// For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
template <typename Activation>
struct Add_Mul_Activation_Mul_Clamp
{
Add_Mul_Activation_Mul_Clamp(float multiplier1, float multiplier2, Activation activationOp)
: multiplier1_(multiplier1), multiplier2_(multiplier2), activationOp_(activationOp)
{
}
__host__ __device__ constexpr void
operator()(int8_t& y, const int32_t& x1, const int32_t& x2) const
{
float y_fp32 = ck::type_convert<float>(x1 + x2);
y_fp32 = multiplier1_ * y_fp32;
activationOp_(y_fp32, y_fp32);
y_fp32 = math::clamp(multiplier2_ * y_fp32, -128.f, 127.f);
y = ck::type_convert<int8_t>(y_fp32);
}
float multiplier1_;
float multiplier2_;
Activation activationOp_;
};
} // namespace element_wise
} // namespace tensor_operation
} // namespace ck

View File

@@ -4,6 +4,7 @@
#pragma once
#include "ck/utility/data_type.hpp"
#include "ck/utility/math.hpp"
#include "ck/utility/math_v2.hpp"
namespace ck {