From f0b6fdcadb9567cab74498070aa1d9b3791f5951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Pietil=C3=A4?=
 <188998872+vpietila-amd@users.noreply.github.com>
Date: Tue, 28 Oct 2025 07:19:29 +0200
Subject: [PATCH] Add name member to CK elementwise operations. (#3102)

[ROCm/composable_kernel commit: 1c17bae816edc44c32ee9d1a19d79d768fd1be13]
---
 .../element/binary_element_wise_operation.hpp | 20 +++++
 .../combined_element_wise_operation.hpp       |  6 ++
 .../gpu/element/element_wise_operation.hpp    | 25 ++++++
 .../gpu/element/quantization_operation.hpp    | 14 +++
 .../element/unary_element_wise_operation.hpp  | 88 +++++++++++++++++++
 5 files changed, 153 insertions(+)
diff --git a/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
index 4954144aca..10a9a4dbae 100644
--- a/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp
@@ -96,6 +96,8 @@ struct Add
 
 struct Max
 {
+    static constexpr const char* name = "Max";
+
     template <typename Y, typename X0, typename X1>
     __host__ __device__ void operator()(Y& y, const X0& x0, const X1& x1) const
     {
@@ -107,6 +109,8 @@ struct Max
 
 struct Min
 {
+    static constexpr const char* name = "Min";
+
     template <typename Y, typename X0, typename X1>
     __host__ __device__ void operator()(Y& y, const X0& x0, const X1& x1) const
     {
@@ -118,6 +122,8 @@ struct Min
 
 struct Multiply
 {
+    static constexpr const char* name = "Multiply";
+
     template <typename Y, typename X0, typename X1>
     __host__ __device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const;
 
@@ -210,6 +216,8 @@ struct Multiply
 
 struct ScaleAdd
 {
+    static constexpr const char* name = "ScaleAdd";
+
     __host__ __device__ ScaleAdd(float scale = 1.f) : scale_(scale) {}
 
     template <typename Y, typename X0, typename X1>
@@ -237,6 +245,8 @@ struct ScaleAdd
 
 struct Subtract
 {
+    static constexpr const char* name = "Subtract";
+
     template <typename T>
     __host__ __device__ constexpr void operator()(T& y, const T& x0, const T& x1) const;
 
@@ -531,6 +541,8 @@ struct AddRelu
 
 struct AddHardswish
 {
+    static constexpr const char* name = "AddHardswish";
+
     template <typename T>
     __host__ __device__ constexpr void operator()(T& y, const T& x0, const T& x1) const;
 
@@ -568,6 +580,8 @@ struct AddHardswish
 // E = FastGelu(C + D)
 struct AddFastGelu
 {
+    static constexpr const char* name = "AddFastGelu";
+
     template <typename E, typename C, typename D>
     __host__ __device__ constexpr void operator()(E& e, const C& c, const D& d) const;
 
@@ -633,6 +647,8 @@ struct AddFastGelu
 // E = MultiplyFastGelu(C + D)
 struct MultiplyFastGelu
 {
+    static constexpr const char* name = "MultiplyFastGelu";
+
     template <typename E, typename C, typename D>
     __host__ __device__ constexpr void operator()(E& e, const C& c, const D& d) const;
 
@@ -698,6 +714,8 @@ struct MultiplyFastGelu
 // E = Silu(C + D)
 struct AddSilu
 {
+    static constexpr const char* name = "AddSilu";
+
     template <typename E, typename C, typename D>
     __host__ __device__ constexpr void operator()(E& e, const C& c, const D& d) const;
 
@@ -748,6 +766,8 @@ struct AddSilu
 
 struct ConvScaleAdd
 {
+    static constexpr const char* name = "ConvScaleAdd";
+
     __host__ __device__ ConvScaleAdd(float scale_in  = 1.f,
                                      float scale_wei = 1.f,
                                      float scale_out = 1.f)
diff --git a/include/ck/tensor_operation/gpu/element/combined_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/combined_element_wise_operation.hpp
index 3cc1c3c42c..083327b8f0 100644
--- a/include/ck/tensor_operation/gpu/element/combined_element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/combined_element_wise_operation.hpp
@@ -13,6 +13,8 @@ namespace element_wise {
 template <typename... UnaryOpsSet>
 struct UnaryCombinedOp
 {
+    static constexpr const char* name = "UnaryCombinedOp";
+
     __host__ __device__ UnaryCombinedOp() : unary_ops_() {}
 
     __host__ __device__ UnaryCombinedOp(UnaryOpsSet... unary_ops) : unary_ops_(unary_ops...) {}
@@ -33,6 +35,8 @@ struct UnaryCombinedOp
 template <typename BinaryOp, typename UnaryOp0, typename UnaryOp1>
 struct BinaryWithUnaryCombinedOp
 {
+    static constexpr const char* name = "BinaryWithUnaryCombinedOp";
+
     __host__ __device__ BinaryWithUnaryCombinedOp() : binary_op_(), unary_op0_(), unary_op1_() {}
 
     __host__ __device__ BinaryWithUnaryCombinedOp(BinaryOp binary_op,
@@ -66,6 +70,8 @@ template <typename BinaryOp0,
           typename UnaryOp2>
 struct TrinaryWithUnaryCombinedOp
 {
+    static constexpr const char* name = "TrinaryWithUnaryCombinedOp";
+
     __host__ __device__ TrinaryWithUnaryCombinedOp()
         : binary_op0_(), binary_op1_(), unary_op0_(), unary_op1_(), unary_op2_()
     {
diff --git a/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
index 5edcdd257b..709f1bf6aa 100644
--- a/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp
@@ -33,6 +33,8 @@ namespace element_wise {
 
 struct AddReluAdd
 {
+    static constexpr const char* name = "AddReluAdd";
+
     template <typename Y, typename X0, typename X1, typename X2>
     __host__ __device__ constexpr void operator()(Y&, const X0&, const X1&, const X2&) const;
 
@@ -102,6 +104,8 @@ struct AddReluAdd
 
 struct AddHardswishAdd
 {
+    static constexpr const char* name = "AddHardswishAdd";
+
     template <typename Y, typename X0, typename X1, typename X2>
     __host__ __device__ constexpr void operator()(Y&, const X0&, const X1&, const X2&) const;
 
@@ -134,6 +138,8 @@ struct AddHardswishAdd
 // E = C + D0 + D1
 struct AddAdd
 {
+    static constexpr const char* name = "AddAdd";
+
     template <typename E, typename C, typename D0, typename D1>
     __host__ __device__ void operator()(E& e, const C& c, const D0& d0, const D1& d1) const
     {
@@ -163,6 +169,8 @@ struct AddAdd
 // E = (C + D0) x D1
 struct AddMultiply
 {
+    static constexpr const char* name = "AddMultiply";
+
     template <typename E, typename C, typename D0, typename D1>
     __host__ __device__ void operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
 
@@ -199,6 +207,8 @@ struct AddMultiply
 // E = C x D0 + D1
 struct MultiplyAdd
 {
+    static constexpr const char* name = "MultiplyAdd";
+
     template <typename E, typename C, typename D0, typename D1>
     __host__ __device__ void operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
 
@@ -251,6 +261,8 @@ struct MultiplyAdd
 
 struct MultiplyMultiply
 {
+    static constexpr const char* name = "MultiplyMultiply";
+
     template <typename E, typename C, typename D0, typename D1>
     __host__ __device__ constexpr void
     operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -306,6 +318,8 @@ struct MultiplyMultiply
 
 struct MultiplyAddFastGelu
 {
+    static constexpr const char* name = "MultiplyAddFastGelu";
+
     template <typename E, typename C, typename D0, typename D1>
     __host__ __device__ constexpr void
     operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -327,6 +341,8 @@ struct MultiplyAddFastGelu
 // E = FastGelu(C + D0 + D1)
 struct AddAddFastGelu
 {
+    static constexpr const char* name = "AddAddFastGelu";
+
     template <typename E, typename C, typename D0, typename D1>
     __host__ __device__ constexpr void
     operator()(E& e, const C& c, const D0& d0, const D1& d1) const;
@@ -398,6 +414,7 @@ struct AddAddFastGelu
 // E = Relu(alpha1 * C + alpha2 * D0 + D1)
 struct ScaleAddScaleAddRelu
 {
+    static constexpr const char* name = "ScaleAddScaleAddRelu";
 
     ScaleAddScaleAddRelu(const float alpha1 = 1.f, const float alpha2 = 1.f)
         : alpha1_(alpha1), alpha2_(alpha2)
@@ -462,6 +479,8 @@ struct ScaleAddScaleAddRelu
 
 struct Normalize
 {
+    static constexpr const char* name = "Normalize";
+
     // FIXME: is double absolutely necessary?
     Normalize(double epsilon = 1e-4) : epsilon_(epsilon) {}
 
@@ -533,6 +552,8 @@ struct Normalize
 // The data type of mean and variance is used as AccDataType
 struct NormalizeInInfer
 {
+    static constexpr const char* name = "NormalizeInInfer";
+
     NormalizeInInfer(double epsilon = 1e-4) : epsilon_(epsilon) {}
 
     template <typename T1, typename T2, typename T3, typename T4>
@@ -622,6 +643,8 @@ struct UnaryTypeConvert;
 template <>
 struct UnaryTypeConvert<float, ck::bhalf_t>
 {
+    static constexpr const char* name = "UnaryTypeConvert";
+
     __host__ __device__ void operator()(float& y, ck::bhalf_t& x) const
     {
         y = ck::type_convert<float, ck::bhalf_t>(x);
@@ -631,6 +654,8 @@ struct UnaryTypeConvert<float, ck::bhalf_t>
 template <>
 struct UnaryTypeConvert<ck::bhalf_t, float>
 {
+    static constexpr const char* name = "UnaryTypeConvert";
+
     __host__ __device__ void operator()(ck::bhalf_t& y, float& x) const
     {
         y = ck::type_convert<ck::bhalf_t, float>(x);
diff --git a/include/ck/tensor_operation/gpu/element/quantization_operation.hpp b/include/ck/tensor_operation/gpu/element/quantization_operation.hpp
index fefa6c793f..a5cbfbb2fc 100644
--- a/include/ck/tensor_operation/gpu/element/quantization_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/quantization_operation.hpp
@@ -24,6 +24,8 @@ namespace element_wise {
 template <typename Activation>
 struct Activation_Mul_Clamp
 {
+    static constexpr const char* name = "Activation_Mul_Clamp";
+
     // Convolution + Activation (piecewise linear function)
     // If an activation is piecewise linear function, then Activation(Sy * Qy) = Sy * Activation(Qy)
     // Z = Activation(Y) = Activation(W @ X)
@@ -71,6 +73,8 @@ struct Activation_Mul_Clamp
 template <typename Activation>
 struct Mul_Activation_Mul_Clamp
 {
+    static constexpr const char* name = "Mul_Activation_Mul_Clamp";
+
     // Convolution + Activation (non piecewise linear function)
     // Z = Activation(Y) = Activation(W @ X)
     // Sz * Qz = Activation(Sy * Qy)
@@ -101,6 +105,8 @@ struct Mul_Activation_Mul_Clamp
 template <typename Activation>
 struct Activation_Mul2_Clamp
 {
+    static constexpr const char* name = "Activation_Mul2_Clamp";
+
     Activation_Mul2_Clamp(Activation activationOp) : activationOp_(activationOp) {}
 
     __host__ __device__ constexpr void
@@ -131,6 +137,8 @@ struct Activation_Mul2_Clamp
 template <typename Activation>
 struct Add_Activation_Mul_Clamp
 {
+    static constexpr const char* name = "Add_Activation_Mul_Clamp";
+
     // Convolution + bias
     // Let Bias = B = Sw * Sx * Qb
     // Where Qb is int32
@@ -175,6 +183,8 @@ struct Add_Activation_Mul_Clamp
 template <typename Activation>
 struct Add_Activation_Mul2_Clamp
 {
+    static constexpr const char* name = "Add_Activation_Mul2_Clamp";
+
     Add_Activation_Mul2_Clamp(Activation activationOp) : activationOp_(activationOp) {}
 
     __host__ __device__ constexpr void
@@ -206,6 +216,8 @@ struct Add_Activation_Mul2_Clamp
 template <typename Activation>
 struct Add_Mul_Activation_Mul_Clamp
 {
+    static constexpr const char* name = "Add_Mul_Activation_Mul_Clamp";
+
     // Convolution + Activation (non piecewise linear function)
     // Z = Activation(Y) = Activation(W @ X + B)
     // Sz * Qz = Activation(Sy * Qy)
@@ -250,6 +262,8 @@ struct Add_Mul_Activation_Mul_Clamp
 template <typename Activation>
 struct Add_Mul2_Activation_Mul_Clamp
 {
+    static constexpr const char* name = "Add_Mul2_Activation_Mul_Clamp";
+
     Add_Mul2_Activation_Mul_Clamp(float scale_z_inv, Activation activationOp)
         : scale_z_inv_(scale_z_inv), activationOp_(activationOp)
     {
diff --git a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
index 59292b30e2..a0b33afb4d 100644
--- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
+++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
@@ -157,6 +157,8 @@ namespace element_wise {
 
 struct PassThroughPack8
 {
+    static constexpr const char* name = "PassThroughPack8";
+
     template <typename Y, typename X>
     __host__ __device__ void operator()(Y& y, const X& x) const;
 
@@ -265,6 +267,8 @@ struct PassThroughPack8
 
 struct DequantPack8
 {
+    static constexpr const char* name = "DequantPack8";
+
     template <typename Y, typename X, typename Z>
     __host__ __device__ void operator()(Y& y, const X& x, const Z& z) const;
 
@@ -301,6 +305,8 @@ struct DequantPack8
 
 struct PassThroughPack2
 {
+    static constexpr const char* name = "PassThroughPack2";
+
     template <typename Y, typename X>
     __host__ __device__ void operator()(Y& y, const X& x) const;
 
@@ -558,6 +564,8 @@ struct PassThrough
 
 struct UnaryConvert
 {
+    static constexpr const char* name = "UnaryConvert";
+
     template <typename Y, typename X>
     __host__ __device__ void operator()(Y& y, const X& x) const
     {
@@ -567,6 +575,8 @@ struct UnaryConvert
 
 struct ConvertBF16RTN
 {
+    static constexpr const char* name = "ConvertBF16RTN";
+
     // convert to bf16 using round to nearest (rtn)
     template <typename Y, typename X>
     __host__ __device__ void operator()(Y& y, const X& x) const
@@ -584,6 +594,8 @@ struct ConvertBF16RTN
 
 struct ConvertF8SR
 {
+    static constexpr const char* name = "ConvertF8SR";
+
     // convert to fp8 using stochastic rounding (SR)
     template <typename Y, typename X>
     __host__ __device__ void operator()(Y& y, const X& x) const
@@ -602,6 +614,8 @@ struct ConvertF8SR
 
 struct ConvertF8RNE
 {
+    static constexpr const char* name = "ConvertF8RNE";
+
     // convert to fp8 using rounding to nearest even
     template <typename Y, typename X>
     __host__ __device__ void operator()(Y& y, const X& x) const
@@ -667,6 +681,8 @@ struct Scale
 
 struct ScaleAndResetNaNToMinusInfinity
 {
+    static constexpr const char* name = "ScaleAndResetNaNToMinusInfinity";
+
     __host__ __device__ ScaleAndResetNaNToMinusInfinity(float scale) : scale_(scale) {}
 
     template <typename Y, typename X>
@@ -683,6 +699,8 @@ struct ScaleAndResetNaNToMinusInfinity
 
 struct UnaryDivide
 {
+    static constexpr const char* name = "UnaryDivide";
+
     __host__ __device__ UnaryDivide(const int32_t divider = 1) : divider_(divider) {}
 
     template <typename T>
@@ -727,6 +745,8 @@ struct UnaryDivide
 
 struct UnarySquare
 {
+    static constexpr const char* name = "UnarySquare";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -743,6 +763,8 @@ struct UnarySquare
 
 struct UnaryAbs
 {
+    static constexpr const char* name = "UnaryAbs";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -773,6 +795,8 @@ struct UnaryAbs
 
 struct UnarySqrt
 {
+    static constexpr const char* name = "UnarySqrt";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -898,6 +922,8 @@ struct Relu
 // gpu code use lower accuracy "_ocml_exp_f32" and "rcp" function
 struct FastGelu
 {
+    static constexpr const char* name = "FastGelu";
+
     template <typename Y, typename X>
     __host__ void operator()(Y& y, const X& x) const;
 
@@ -1013,6 +1039,8 @@ struct FastGelu
 // y = 0.5*x*(1+erf(x/sqrt(2)))
 struct Gelu
 {
+    static constexpr const char* name = "Gelu";
+
     template <typename Y, typename X>
     __host__ __device__ void operator()(Y& y, const X& x) const;
 
@@ -1031,6 +1059,8 @@ struct Gelu
 
 struct Sigmoid
 {
+    static constexpr const char* name = "Sigmoid";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1055,6 +1085,8 @@ struct Sigmoid
 
 struct Silu
 {
+    static constexpr const char* name = "SiLU";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1068,6 +1100,8 @@ struct Silu
 
 struct TanH
 {
+    static constexpr const char* name = "TanH";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1091,6 +1125,8 @@ struct TanH
 
 struct ACos
 {
+    static constexpr const char* name = "ACos";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1105,6 +1141,8 @@ struct ACos
 
 struct Neg
 {
+    static constexpr const char* name = "Neg";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1119,6 +1157,8 @@ struct Neg
 
 struct ATan
 {
+    static constexpr const char* name = "ATan";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1133,6 +1173,8 @@ struct ATan
 
 struct Sin
 {
+    static constexpr const char* name = "Sin";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1147,6 +1189,8 @@ struct Sin
 
 struct ASinH
 {
+    static constexpr const char* name = "ASinH";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1161,6 +1205,8 @@ struct ASinH
 
 struct Cos
 {
+    static constexpr const char* name = "Cos";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1175,6 +1221,8 @@ struct Cos
 
 struct ACosH
 {
+    static constexpr const char* name = "ACosH";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1189,6 +1237,8 @@ struct ACosH
 
 struct Tan
 {
+    static constexpr const char* name = "Tan";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1203,6 +1253,8 @@ struct Tan
 
 struct ATanH
 {
+    static constexpr const char* name = "ATanH";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1217,6 +1269,8 @@ struct ATanH
 
 struct SinH
 {
+    static constexpr const char* name = "SinH";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1231,6 +1285,8 @@ struct SinH
 
 struct Ceil
 {
+    static constexpr const char* name = "Ceil";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1245,6 +1301,8 @@ struct Ceil
 
 struct Exp
 {
+    static constexpr const char* name = "Exp";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1259,6 +1317,8 @@ struct Exp
 
 struct CosH
 {
+    static constexpr const char* name = "CosH";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1273,6 +1333,8 @@ struct CosH
 
 struct Floor
 {
+    static constexpr const char* name = "Floor";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1287,6 +1349,8 @@ struct Floor
 
 struct Log
 {
+    static constexpr const char* name = "Log";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1301,6 +1365,8 @@ struct Log
 
 struct ASin
 {
+    static constexpr const char* name = "ASin";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1315,6 +1381,8 @@ struct ASin
 
 struct Rcp
 {
+    static constexpr const char* name = "Rcp";
+
     template <typename T>
     __host__ __device__ void operator()(T& y, const T& x) const
     {
@@ -1329,6 +1397,8 @@ struct Rcp
 
 struct Swish
 {
+    static constexpr const char* name = "Swish";
+
     Swish(float beta = 1.0f) : beta_(beta) {}
 
     template <typename Y, typename X>
@@ -1358,6 +1428,8 @@ struct Swish
 
 struct SoftRelu
 {
+    static constexpr const char* name = "SoftRelu";
+
     SoftRelu(float alpha = 1.f) : alpha_(alpha){};
 
     template <typename T>
@@ -1386,6 +1458,8 @@ struct SoftRelu
 
 struct Power
 {
+    static constexpr const char* name = "Power";
+
     Power(float alpha = 0.f, float beta = 1.f, float gamma = 2.f)
         : alpha_(alpha), beta_(beta), gamma_(gamma){};
 
@@ -1420,6 +1494,8 @@ struct Power
 
 struct ClippedRelu
 {
+    static constexpr const char* name = "ClippedRelu";
+
     ClippedRelu(float alpha = 0.f, float beta = 1.f) : alpha_(alpha), beta_(beta){};
 
     template <typename T>
@@ -1449,6 +1525,8 @@ struct ClippedRelu
 
 struct LeakyRelu
 {
+    static constexpr const char* name = "LeakyRelu";
+
     LeakyRelu(float alpha = 0.01f) : alpha_(alpha){};
 
     template <typename T>
@@ -1476,6 +1554,8 @@ struct LeakyRelu
 
 struct Elu
 {
+    static constexpr const char* name = "Elu";
+
     Elu(float alpha = 1.f) : alpha_(alpha){};
 
     template <typename T>
@@ -1503,6 +1583,8 @@ struct Elu
 
 struct Logistic
 {
+    static constexpr const char* name = "Logistic";
+
     Logistic(float alpha = 1.f) : alpha_(alpha){};
 
     template <typename T>
@@ -1531,6 +1613,8 @@ struct Logistic
 
 struct ConvInvscale
 {
+    static constexpr const char* name = "ConvInvscale";
+
     __host__ __device__ ConvInvscale(float scale_in  = 1.f,
                                      float scale_wei = 1.f,
                                      float scale_out = 1.f)
@@ -1554,6 +1638,8 @@ struct ConvInvscale
 
 struct ConvScale
 {
+    static constexpr const char* name = "ConvScale";
+
     __host__ __device__ ConvScale(float scale_in  = 1.f,
                                   float scale_wei = 1.f,
                                   float scale_out = 1.f)
@@ -1577,6 +1663,8 @@ struct ConvScale
 
 struct ConvScaleRelu
 {
+    static constexpr const char* name = "ConvScaleRelu";
+
     __host__ __device__ ConvScaleRelu(float scale_in  = 1.f,
                                       float scale_wei = 1.f,
                                       float scale_out = 1.f)