mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
Conv perlayer int8 quantization (#471)
* Add conv2d requant example * Fix bash error * Rename example * 1. Rename gemm quantization 2. shares the requantization lambda function with conv * Refine declare type * Add conv bias relu quantization exmaple * clang format * Fix compile error due to merge develop * Fix CI error * Extract quantization post operation into another file * Support quantization for non piecewise linear function * Add instance for conv quantization * Add convolution quantization factory * Add convolution quantization client example * Add more instances with different template parameters * clang format * Sync the naming with the develop
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
#include "ck/utility/math_v2.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/quantization_operation.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
#pragma once
|
||||
|
||||
#include "ck/utility/data_type.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace element_wise {
|
||||
|
||||
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
|
||||
template <typename Activation>
|
||||
struct Activation_Mul_Clamp
|
||||
{
|
||||
Activation_Mul_Clamp(float multiplier, Activation activationOp)
|
||||
: multiplier_(multiplier), activationOp_(activationOp)
|
||||
{
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void operator()(int8_t& y, const int32_t& x) const
|
||||
{
|
||||
float x_fp32 = ck::type_convert<float>(x);
|
||||
activationOp_(x_fp32, x_fp32);
|
||||
float y_fp32 = math::clamp(multiplier_ * x_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void operator()(float& y, const int32_t& x) const
|
||||
{
|
||||
// We might type_convert to int8 after lambda in someplace
|
||||
float x_fp32 = ck::type_convert<float>(x);
|
||||
activationOp_(x_fp32, x_fp32);
|
||||
y = math::clamp(multiplier_ * x_fp32, -128.f, 127.f);
|
||||
}
|
||||
|
||||
float multiplier_;
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
|
||||
template <typename Activation>
|
||||
struct Add_Activation_Mul_Clamp
|
||||
{
|
||||
Add_Activation_Mul_Clamp(float multiplier, Activation activationOp)
|
||||
: multiplier_(multiplier), activationOp_(activationOp)
|
||||
{
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void
|
||||
operator()(int8_t& y, const int32_t& x1, const int32_t& x2) const
|
||||
{
|
||||
float y_fp32 = ck::type_convert<float>(x1 + x2);
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(multiplier_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
float multiplier_;
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
// For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
|
||||
template <typename Activation>
|
||||
struct Add_Mul_Activation_Mul_Clamp
|
||||
{
|
||||
Add_Mul_Activation_Mul_Clamp(float multiplier1, float multiplier2, Activation activationOp)
|
||||
: multiplier1_(multiplier1), multiplier2_(multiplier2), activationOp_(activationOp)
|
||||
{
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void
|
||||
operator()(int8_t& y, const int32_t& x1, const int32_t& x2) const
|
||||
{
|
||||
float y_fp32 = ck::type_convert<float>(x1 + x2);
|
||||
y_fp32 = multiplier1_ * y_fp32;
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(multiplier2_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
float multiplier1_;
|
||||
float multiplier2_;
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
} // namespace element_wise
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -4,6 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "ck/utility/data_type.hpp"
|
||||
#include "ck/utility/math.hpp"
|
||||
#include "ck/utility/math_v2.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
Reference in New Issue
Block a user