Add name member to CK elementwise operations. (#3102)

This commit is contained in:
Ville Pietilä
2025-10-28 07:19:29 +02:00
committed by GitHub
parent 54746e9329
commit 1c17bae816
5 changed files with 153 additions and 0 deletions

View File

@@ -96,6 +96,8 @@ struct Add
struct Max
{
static constexpr const char* name = "Max";
template <typename Y, typename X0, typename X1>
__host__ __device__ void operator()(Y& y, const X0& x0, const X1& x1) const
{
@@ -107,6 +109,8 @@ struct Max
struct Min
{
static constexpr const char* name = "Min";
template <typename Y, typename X0, typename X1>
__host__ __device__ void operator()(Y& y, const X0& x0, const X1& x1) const
{
@@ -118,6 +122,8 @@ struct Min
struct Multiply
{
static constexpr const char* name = "Multiply";
template <typename Y, typename X0, typename X1>
__host__ __device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const;
@@ -210,6 +216,8 @@ struct Multiply
struct ScaleAdd
{
static constexpr const char* name = "ScaleAdd";
__host__ __device__ ScaleAdd(float scale = 1.f) : scale_(scale) {}
template <typename Y, typename X0, typename X1>
@@ -237,6 +245,8 @@ struct ScaleAdd
struct Subtract
{
static constexpr const char* name = "Subtract";
template <typename T>
__host__ __device__ constexpr void operator()(T& y, const T& x0, const T& x1) const;
@@ -531,6 +541,8 @@ struct AddRelu
struct AddHardswish
{
static constexpr const char* name = "AddHardswish";
template <typename T>
__host__ __device__ constexpr void operator()(T& y, const T& x0, const T& x1) const;
@@ -568,6 +580,8 @@ struct AddHardswish
// E = FastGelu(C + D)
struct AddFastGelu
{
static constexpr const char* name = "AddFastGelu";
template <typename E, typename C, typename D>
__host__ __device__ constexpr void operator()(E& e, const C& c, const D& d) const;
@@ -633,6 +647,8 @@ struct AddFastGelu
// E = MultiplyFastGelu(C + D)
struct MultiplyFastGelu
{
static constexpr const char* name = "MultiplyFastGelu";
template <typename E, typename C, typename D>
__host__ __device__ constexpr void operator()(E& e, const C& c, const D& d) const;
@@ -698,6 +714,8 @@ struct MultiplyFastGelu
// E = Silu(C + D)
struct AddSilu
{
static constexpr const char* name = "AddSilu";
template <typename E, typename C, typename D>
__host__ __device__ constexpr void operator()(E& e, const C& c, const D& d) const;
@@ -748,6 +766,8 @@ struct AddSilu
struct ConvScaleAdd
{
static constexpr const char* name = "ConvScaleAdd";
__host__ __device__ ConvScaleAdd(float scale_in = 1.f,
float scale_wei = 1.f,
float scale_out = 1.f)

View File

@@ -13,6 +13,8 @@ namespace element_wise {
template <typename... UnaryOpsSet>
struct UnaryCombinedOp
{
static constexpr const char* name = "UnaryCombinedOp";
__host__ __device__ UnaryCombinedOp() : unary_ops_() {}
__host__ __device__ UnaryCombinedOp(UnaryOpsSet... unary_ops) : unary_ops_(unary_ops...) {}
@@ -33,6 +35,8 @@ struct UnaryCombinedOp
template <typename BinaryOp, typename UnaryOp0, typename UnaryOp1>
struct BinaryWithUnaryCombinedOp
{
static constexpr const char* name = "BinaryWithUnaryCombinedOp";
__host__ __device__ BinaryWithUnaryCombinedOp() : binary_op_(), unary_op0_(), unary_op1_() {}
__host__ __device__ BinaryWithUnaryCombinedOp(BinaryOp binary_op,
@@ -66,6 +70,8 @@ template <typename BinaryOp0,
typename UnaryOp2>
struct TrinaryWithUnaryCombinedOp
{
static constexpr const char* name = "TrinaryWithUnaryCombinedOp";
__host__ __device__ TrinaryWithUnaryCombinedOp()
: binary_op0_(), binary_op1_(), unary_op0_(), unary_op1_(), unary_op2_()
{

View File

@@ -33,6 +33,8 @@ namespace element_wise {
struct AddReluAdd
{
static constexpr const char* name = "AddReluAdd";
template <typename Y, typename X0, typename X1, typename X2>
__host__ __device__ constexpr void operator()(Y&, const X0&, const X1&, const X2&) const;
@@ -102,6 +104,8 @@ struct AddReluAdd
struct AddHardswishAdd
{
static constexpr const char* name = "AddHardswishAdd";
template <typename Y, typename X0, typename X1, typename X2>
__host__ __device__ constexpr void operator()(Y&, const X0&, const X1&, const X2&) const;
@@ -134,6 +138,8 @@ struct AddHardswishAdd
// E = C + D0 + D1
struct AddAdd
{
static constexpr const char* name = "AddAdd";
template <typename E, typename C, typename D0, typename D1>
__host__ __device__ void operator()(E& e, const C& c, const D0& d0, const D1& d1) const
{
@@ -163,6 +169,8 @@ struct AddAdd
// E = (C + D0) x D1
struct AddMultiply
{
static constexpr const char* name = "AddMultiply";
template <typename E, typename C, typename D0, typename D1>
__host__ __device__ void operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -199,6 +207,8 @@ struct AddMultiply
// E = C x D0 + D1
struct MultiplyAdd
{
static constexpr const char* name = "MultiplyAdd";
template <typename E, typename C, typename D0, typename D1>
__host__ __device__ void operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -251,6 +261,8 @@ struct MultiplyAdd
struct MultiplyMultiply
{
static constexpr const char* name = "MultiplyMultiply";
template <typename E, typename C, typename D0, typename D1>
__host__ __device__ constexpr void
operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -306,6 +318,8 @@ struct MultiplyMultiply
struct MultiplyAddFastGelu
{
static constexpr const char* name = "MultiplyAddFastGelu";
template <typename E, typename C, typename D0, typename D1>
__host__ __device__ constexpr void
operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -327,6 +341,8 @@ struct MultiplyAddFastGelu
// E = FastGelu(C + D0 + D1)
struct AddAddFastGelu
{
static constexpr const char* name = "AddAddFastGelu";
template <typename E, typename C, typename D0, typename D1>
__host__ __device__ constexpr void
operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -398,6 +414,7 @@ struct AddAddFastGelu
// E = Relu(alpha1 * C + alpha2 * D0 + D1)
struct ScaleAddScaleAddRelu
{
static constexpr const char* name = "ScaleAddScaleAddRelu";
ScaleAddScaleAddRelu(const float alpha1 = 1.f, const float alpha2 = 1.f)
: alpha1_(alpha1), alpha2_(alpha2)
@@ -462,6 +479,8 @@ struct ScaleAddScaleAddRelu
struct Normalize
{
static constexpr const char* name = "Normalize";
// FIXME: is double absolutely necessary?
Normalize(double epsilon = 1e-4) : epsilon_(epsilon) {}
@@ -533,6 +552,8 @@ struct Normalize
// The data type of mean and variance is used as AccDataType
struct NormalizeInInfer
{
static constexpr const char* name = "NormalizeInInfer";
NormalizeInInfer(double epsilon = 1e-4) : epsilon_(epsilon) {}
template <typename T1, typename T2, typename T3, typename T4>
@@ -622,6 +643,8 @@ struct UnaryTypeConvert;
template <>
struct UnaryTypeConvert<float, ck::bhalf_t>
{
static constexpr const char* name = "UnaryTypeConvert";
__host__ __device__ void operator()(float& y, ck::bhalf_t& x) const
{
y = ck::type_convert<float, ck::bhalf_t>(x);
@@ -631,6 +654,8 @@ struct UnaryTypeConvert<float, ck::bhalf_t>
template <>
struct UnaryTypeConvert<ck::bhalf_t, float>
{
static constexpr const char* name = "UnaryTypeConvert";
__host__ __device__ void operator()(ck::bhalf_t& y, float& x) const
{
y = ck::type_convert<ck::bhalf_t, float>(x);

View File

@@ -24,6 +24,8 @@ namespace element_wise {
template <typename Activation>
struct Activation_Mul_Clamp
{
static constexpr const char* name = "Activation_Mul_Clamp";
// Convolution + Activation (piecewise linear function)
// If an activation is piecewise linear function, then Activation(Sy * Qy) = Sy * Activation(Qy)
// Z = Activation(Y) = Activation(W @ X)
@@ -71,6 +73,8 @@ struct Activation_Mul_Clamp
template <typename Activation>
struct Mul_Activation_Mul_Clamp
{
static constexpr const char* name = "Mul_Activation_Mul_Clamp";
// Convolution + Activation (non piecewise linear function)
// Z = Activation(Y) = Activation(W @ X)
// Sz * Qz = Activation(Sy * Qy)
@@ -101,6 +105,8 @@ struct Mul_Activation_Mul_Clamp
template <typename Activation>
struct Activation_Mul2_Clamp
{
static constexpr const char* name = "Activation_Mul2_Clamp";
Activation_Mul2_Clamp(Activation activationOp) : activationOp_(activationOp) {}
__host__ __device__ constexpr void
@@ -131,6 +137,8 @@ struct Activation_Mul2_Clamp
template <typename Activation>
struct Add_Activation_Mul_Clamp
{
static constexpr const char* name = "Add_Activation_Mul_Clamp";
// Convolution + bias
// Let Bias = B = Sw * Sx * Qb
// Where Qb is int32
@@ -175,6 +183,8 @@ struct Add_Activation_Mul_Clamp
template <typename Activation>
struct Add_Activation_Mul2_Clamp
{
static constexpr const char* name = "Add_Activation_Mul2_Clamp";
Add_Activation_Mul2_Clamp(Activation activationOp) : activationOp_(activationOp) {}
__host__ __device__ constexpr void
@@ -206,6 +216,8 @@ struct Add_Activation_Mul2_Clamp
template <typename Activation>
struct Add_Mul_Activation_Mul_Clamp
{
static constexpr const char* name = "Add_Mul_Activation_Mul_Clamp";
// Convolution + Activation (non piecewise linear function)
// Z = Activation(Y) = Activation(W @ X + B)
// Sz * Qz = Activation(Sy * Qy)
@@ -250,6 +262,8 @@ struct Add_Mul_Activation_Mul_Clamp
template <typename Activation>
struct Add_Mul2_Activation_Mul_Clamp
{
static constexpr const char* name = "Add_Mul2_Activation_Mul_Clamp";
Add_Mul2_Activation_Mul_Clamp(float scale_z_inv, Activation activationOp)
: scale_z_inv_(scale_z_inv), activationOp_(activationOp)
{

View File

@@ -157,6 +157,8 @@ namespace element_wise {
struct PassThroughPack8
{
static constexpr const char* name = "PassThroughPack8";
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const;
@@ -265,6 +267,8 @@ struct PassThroughPack8
struct DequantPack8
{
static constexpr const char* name = "DequantPack8";
template <typename Y, typename X, typename Z>
__host__ __device__ void operator()(Y& y, const X& x, const Z& z) const;
@@ -301,6 +305,8 @@ struct DequantPack8
struct PassThroughPack2
{
static constexpr const char* name = "PassThroughPack2";
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const;
@@ -558,6 +564,8 @@ struct PassThrough
struct UnaryConvert
{
static constexpr const char* name = "UnaryConvert";
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const
{
@@ -567,6 +575,8 @@ struct UnaryConvert
struct ConvertBF16RTN
{
static constexpr const char* name = "ConvertBF16RTN";
// convert to bf16 using round to nearest (rtn)
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const
@@ -584,6 +594,8 @@ struct ConvertBF16RTN
struct ConvertF8SR
{
static constexpr const char* name = "ConvertF8SR";
// convert to fp8 using stochastic rounding (SR)
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const
@@ -602,6 +614,8 @@ struct ConvertF8SR
struct ConvertF8RNE
{
static constexpr const char* name = "ConvertF8RNE";
// convert to fp8 using rounding to nearest even
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const
@@ -667,6 +681,8 @@ struct Scale
struct ScaleAndResetNaNToMinusInfinity
{
static constexpr const char* name = "ScaleAndResetNaNToMinusInfinity";
__host__ __device__ ScaleAndResetNaNToMinusInfinity(float scale) : scale_(scale) {}
template <typename Y, typename X>
@@ -683,6 +699,8 @@ struct ScaleAndResetNaNToMinusInfinity
struct UnaryDivide
{
static constexpr const char* name = "UnaryDivide";
__host__ __device__ UnaryDivide(const int32_t divider = 1) : divider_(divider) {}
template <typename T>
@@ -727,6 +745,8 @@ struct UnaryDivide
struct UnarySquare
{
static constexpr const char* name = "UnarySquare";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -743,6 +763,8 @@ struct UnarySquare
struct UnaryAbs
{
static constexpr const char* name = "UnaryAbs";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -773,6 +795,8 @@ struct UnaryAbs
struct UnarySqrt
{
static constexpr const char* name = "UnarySqrt";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -898,6 +922,8 @@ struct Relu
// gpu code use lower accuracy "_ocml_exp_f32" and "rcp" function
struct FastGelu
{
static constexpr const char* name = "FastGelu";
template <typename Y, typename X>
__host__ void operator()(Y& y, const X& x) const;
@@ -1013,6 +1039,8 @@ struct FastGelu
// y = 0.5*x*(1+erf(x/sqrt(2)))
struct Gelu
{
static constexpr const char* name = "Gelu";
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const;
@@ -1031,6 +1059,8 @@ struct Gelu
struct Sigmoid
{
static constexpr const char* name = "Sigmoid";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1055,6 +1085,8 @@ struct Sigmoid
struct Silu
{
static constexpr const char* name = "SiLU";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1068,6 +1100,8 @@ struct Silu
struct TanH
{
static constexpr const char* name = "TanH";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1091,6 +1125,8 @@ struct TanH
struct ACos
{
static constexpr const char* name = "ACos";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1105,6 +1141,8 @@ struct ACos
struct Neg
{
static constexpr const char* name = "Neg";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1119,6 +1157,8 @@ struct Neg
struct ATan
{
static constexpr const char* name = "ATan";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1133,6 +1173,8 @@ struct ATan
struct Sin
{
static constexpr const char* name = "Sin";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1147,6 +1189,8 @@ struct Sin
struct ASinH
{
static constexpr const char* name = "ASinH";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1161,6 +1205,8 @@ struct ASinH
struct Cos
{
static constexpr const char* name = "Cos";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1175,6 +1221,8 @@ struct Cos
struct ACosH
{
static constexpr const char* name = "ACosH";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1189,6 +1237,8 @@ struct ACosH
struct Tan
{
static constexpr const char* name = "Tan";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1203,6 +1253,8 @@ struct Tan
struct ATanH
{
static constexpr const char* name = "ATanH";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1217,6 +1269,8 @@ struct ATanH
struct SinH
{
static constexpr const char* name = "SinH";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1231,6 +1285,8 @@ struct SinH
struct Ceil
{
static constexpr const char* name = "Ceil";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1245,6 +1301,8 @@ struct Ceil
struct Exp
{
static constexpr const char* name = "Exp";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1259,6 +1317,8 @@ struct Exp
struct CosH
{
static constexpr const char* name = "CosH";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1273,6 +1333,8 @@ struct CosH
struct Floor
{
static constexpr const char* name = "Floor";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1287,6 +1349,8 @@ struct Floor
struct Log
{
static constexpr const char* name = "Log";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1301,6 +1365,8 @@ struct Log
struct ASin
{
static constexpr const char* name = "ASin";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1315,6 +1381,8 @@ struct ASin
struct Rcp
{
static constexpr const char* name = "Rcp";
template <typename T>
__host__ __device__ void operator()(T& y, const T& x) const
{
@@ -1329,6 +1397,8 @@ struct Rcp
struct Swish
{
static constexpr const char* name = "Swish";
Swish(float beta = 1.0f) : beta_(beta) {}
template <typename Y, typename X>
@@ -1358,6 +1428,8 @@ struct Swish
struct SoftRelu
{
static constexpr const char* name = "SoftRelu";
SoftRelu(float alpha = 1.f) : alpha_(alpha){};
template <typename T>
@@ -1386,6 +1458,8 @@ struct SoftRelu
struct Power
{
static constexpr const char* name = "Power";
Power(float alpha = 0.f, float beta = 1.f, float gamma = 2.f)
: alpha_(alpha), beta_(beta), gamma_(gamma){};
@@ -1420,6 +1494,8 @@ struct Power
struct ClippedRelu
{
static constexpr const char* name = "ClippedRelu";
ClippedRelu(float alpha = 0.f, float beta = 1.f) : alpha_(alpha), beta_(beta){};
template <typename T>
@@ -1449,6 +1525,8 @@ struct ClippedRelu
struct LeakyRelu
{
static constexpr const char* name = "LeakyRelu";
LeakyRelu(float alpha = 0.01f) : alpha_(alpha){};
template <typename T>
@@ -1476,6 +1554,8 @@ struct LeakyRelu
struct Elu
{
static constexpr const char* name = "Elu";
Elu(float alpha = 1.f) : alpha_(alpha){};
template <typename T>
@@ -1503,6 +1583,8 @@ struct Elu
struct Logistic
{
static constexpr const char* name = "Logistic";
Logistic(float alpha = 1.f) : alpha_(alpha){};
template <typename T>
@@ -1531,6 +1613,8 @@ struct Logistic
struct ConvInvscale
{
static constexpr const char* name = "ConvInvscale";
__host__ __device__ ConvInvscale(float scale_in = 1.f,
float scale_wei = 1.f,
float scale_out = 1.f)
@@ -1554,6 +1638,8 @@ struct ConvInvscale
struct ConvScale
{
static constexpr const char* name = "ConvScale";
__host__ __device__ ConvScale(float scale_in = 1.f,
float scale_wei = 1.f,
float scale_out = 1.f)
@@ -1577,6 +1663,8 @@ struct ConvScale
struct ConvScaleRelu
{
static constexpr const char* name = "ConvScaleRelu";
__host__ __device__ ConvScaleRelu(float scale_in = 1.f,
float scale_wei = 1.f,
float scale_out = 1.f)