mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
Conv + quantization + tanh (#645)
* Rename file. Prepare to support another activation * Add comment for quantization * Extract out_elementop * Add tanh example * Add conv + bias + tanh quantization instance * Add missing parameter * Refine cmake * Add external api and client example * Extract variable in example * Fix the comment --------- Co-authored-by: zjing14 <zhangjing14@gmail.com>
This commit is contained in:
@@ -7,10 +7,30 @@ namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace element_wise {
|
||||
|
||||
// Y = Sy * Qy
|
||||
// W = Sw * Qw
|
||||
// X = Sx * Qx
|
||||
// B = Sb * Qb = Sw * Sx * Qb
|
||||
// Where X, W, Y are float32, Qx, Qw, Qy are int8
|
||||
// Sx, Sw, Sy are scale of x, w, y (float32), which is calculated from quantization range
|
||||
// Qb is int32, scale of B is Sw * Sx for convenient
|
||||
|
||||
// Y = W @ X, where @ is convolution or matrix multiplication
|
||||
// Sy * Qy = Sw * Qw @ Sx * Qx
|
||||
// Qy = [(Sw*Sx)/Sy] * Qw @ Qx
|
||||
|
||||
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
|
||||
// Activation(Sy * Qy) = Sy * Activation(Qy)
|
||||
template <typename Activation>
|
||||
struct Activation_Mul_Clamp
|
||||
{
|
||||
// Convolution + Activation (piecewise linear function)
|
||||
// If an activation is piecewise linear function, then Activation(Sy * Qy) = Sy * Activation(Qy)
|
||||
// Z = Activation(Y) = Activation(W @ X)
|
||||
// Sz * Qz = Activation(Sy * Qy)
|
||||
// Qz = Sy / Sz * Activation(Qy) = (Sw * Sx / Sz) * Activation(Qw @ Qx)
|
||||
|
||||
// requantScale_ = Sw * Sx / Sz
|
||||
Activation_Mul_Clamp(float requantScale, Activation activationOp)
|
||||
: requantScale_(requantScale), activationOp_(activationOp)
|
||||
{
|
||||
@@ -45,8 +65,39 @@ struct Activation_Mul_Clamp
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
// For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
|
||||
// If an activation is not piecewise linear function
|
||||
// then Activation(Sy * Qy) != Sy * Activation(Qy)
|
||||
template <typename Activation>
|
||||
struct Mul_Activation_Mul_Clamp
|
||||
{
|
||||
// Convolution + Activation (non piecewise linear function)
|
||||
// Z = Activation(Y) = Activation(W @ X)
|
||||
// Sz * Qz = Activation(Sy * Qy)
|
||||
// Qz = S1 * Activation[Sacc * (Qw @ Qx)]
|
||||
// Where S1 = 1 / Sz, Sacc = Sw * Sx
|
||||
Mul_Activation_Mul_Clamp(float scale_z_inv, float scaleAcc, Activation activationOp)
|
||||
: scale_z_inv_(scale_z_inv), scaleAcc_(scaleAcc), activationOp_(activationOp)
|
||||
{
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void operator()(int8_t& y, const int32_t& x) const
|
||||
{
|
||||
float y_fp32 = ck::type_convert<float>(x);
|
||||
y_fp32 = scaleAcc_ * y_fp32;
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
float scale_z_inv_;
|
||||
float scaleAcc_;
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
// Conv Perchannel quantization + Activation function which is piecewise linear function, such as
|
||||
// relu, leaky relu ...etc
|
||||
// Activation(Sy * Qy) = Sy * Activation(Qy)
|
||||
template <typename Activation>
|
||||
struct Activation_Mul2_Clamp
|
||||
{
|
||||
@@ -76,9 +127,20 @@ struct Activation_Mul2_Clamp
|
||||
};
|
||||
|
||||
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
|
||||
// Activation(Sy * Qy) = Sy * Activation(Qy)
|
||||
template <typename Activation>
|
||||
struct Add_Activation_Mul_Clamp
|
||||
{
|
||||
// Convolution + bias
|
||||
// Let Bias = B = Sw * Sx * Qb
|
||||
// Where Qb is int32
|
||||
// Y = W @ X + B
|
||||
// Sy * Qy = Sw * Qw @ Sx * Qx + Sw * Sx * Qb
|
||||
// Qy = [(Sw*Sx)/Sy] * (Qw @ Qx + Qb)
|
||||
|
||||
// For activation, Z = Activaiton(Y)
|
||||
// Sz * Qz = Activation(Sy * Qy)
|
||||
// Qz = Sy / Sz * Activation(Qy) = [(Sw*Sx)/Sz] * Activation(Qw @ Qx + Qb)
|
||||
Add_Activation_Mul_Clamp(float requantScale, Activation activationOp)
|
||||
: requantScale_(requantScale), activationOp_(activationOp)
|
||||
{
|
||||
@@ -139,11 +201,18 @@ struct Add_Activation_Mul2_Clamp
|
||||
};
|
||||
|
||||
// For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
|
||||
// If an activation is not piecewise linear function
|
||||
// then Activation(Sy * Qy) != Sy * Activation(Qy)
|
||||
template <typename Activation>
|
||||
struct Add_Mul_Activation_Mul_Clamp
|
||||
{
|
||||
Add_Mul_Activation_Mul_Clamp(float requantScale1, float requantScale2, Activation activationOp)
|
||||
: requantScale1_(requantScale1), requantScale2_(requantScale2), activationOp_(activationOp)
|
||||
// Convolution + Activation (non piecewise linear function)
|
||||
// Z = Activation(Y) = Activation(W @ X + B)
|
||||
// Sz * Qz = Activation(Sy * Qy)
|
||||
// Qz = S1 * Activation[Sacc * (Qw @ Qx + Qb)]
|
||||
// Where S1 = 1 / Sz, Sacc = Sw * Sx
|
||||
Add_Mul_Activation_Mul_Clamp(float scale_z_inv, float scaleAcc, Activation activationOp)
|
||||
: scale_z_inv_(scale_z_inv), scaleAcc_(scaleAcc), activationOp_(activationOp)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -151,14 +220,64 @@ struct Add_Mul_Activation_Mul_Clamp
|
||||
operator()(int8_t& y, const int32_t& x, const int32_t& bias) const
|
||||
{
|
||||
float y_fp32 = ck::type_convert<float>(x + bias);
|
||||
y_fp32 = requantScale1_ * y_fp32;
|
||||
y_fp32 = scaleAcc_ * y_fp32;
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(requantScale2_ * y_fp32, -128.f, 127.f);
|
||||
y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
float requantScale1_;
|
||||
float requantScale2_;
|
||||
__host__ __device__ constexpr void
|
||||
operator()(int32_t& y, const int32_t& x, const int32_t& bias) const
|
||||
{
|
||||
// CAUSION - We might type_convert to int8 in threadwise copy
|
||||
// eg. GridwiseGemmDlMultipleD_km_kn_mn
|
||||
float y_fp32 = ck::type_convert<float>(x + bias);
|
||||
y_fp32 = scaleAcc_ * y_fp32;
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int32_t>(y_fp32);
|
||||
}
|
||||
|
||||
float scale_z_inv_;
|
||||
float scaleAcc_;
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
// Conv Perchannel quantization + Activation function which is non piecewise linear function,
|
||||
// such as TanH, Sigmoid ...etc
|
||||
// If an activation is not piecewise linear function
|
||||
// then Activation(Sy *Qy) != Sy * Activation(Qy)
|
||||
template <typename Activation>
|
||||
struct Add_Mul2_Activation_Mul_Clamp
|
||||
{
|
||||
Add_Mul2_Activation_Mul_Clamp(float scale_z_inv, Activation activationOp)
|
||||
: scale_z_inv_(scale_z_inv), activationOp_(activationOp)
|
||||
{
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void
|
||||
operator()(int8_t& y, const int32_t& x, const int32_t& bias, const float& scaleAcc) const
|
||||
{
|
||||
float y_fp32 = ck::type_convert<float>(x + bias);
|
||||
y_fp32 = scaleAcc * y_fp32;
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int8_t>(y_fp32);
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr void
|
||||
operator()(int32_t& y, const int32_t& x, const int32_t& bias, const float& scaleAcc) const
|
||||
{
|
||||
// CAUSION - We might type_convert to int8 in threadwise copy
|
||||
// eg. GridwiseGemmDlMultipleD_km_kn_mn
|
||||
float y_fp32 = ck::type_convert<float>(x + bias);
|
||||
y_fp32 = scaleAcc * y_fp32;
|
||||
activationOp_(y_fp32, y_fp32);
|
||||
y_fp32 = math::clamp(scale_z_inv_ * y_fp32, -128.f, 127.f);
|
||||
y = ck::type_convert<int32_t>(y_fp32);
|
||||
}
|
||||
|
||||
float scale_z_inv_;
|
||||
Activation activationOp_;
|
||||
};
|
||||
|
||||
|
||||
@@ -320,6 +320,19 @@ struct Sigmoid
|
||||
int32_t divider_ = 1;
|
||||
};
|
||||
|
||||
struct TanH
|
||||
{
|
||||
template <typename T>
|
||||
__host__ __device__ void operator()(T& y, const T& x) const
|
||||
{
|
||||
static_assert(is_same<T, float>::value || is_same<T, double>::value ||
|
||||
is_same<T, ck::half_t>::value,
|
||||
"Data type is not supported by this operation!");
|
||||
|
||||
y = ck::math::tanh(x);
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace element_wise
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
|
||||
Reference in New Issue
Block a user