Batchnorm-forward and Batchnorm-infer Implemented using generic kernels (#320)

* Implement multiple-reduction in one kernel (kernels, device ops, examples) * Add generic elementwise kernel and device interface * Add generator for normal-distributed data initialization * Add host refer implementation of batchnorm-forward and batchnorm-infer * Add examples for implementing batchnorm-forward and batchnorm-infer using generic kernels * Remove un-needed including in batchnorm example * Renaming generic_elementwise to elementiwise in kernel and device classes/functions * Change in gemm_layernorm examples to use DeviceElementwise instead of Device5AryElementwise * Change in exampe 19_binary_elementwise to use DeviceElementwise instead of DeviceBinaryElementwise * Change in device_cgemm_4gemm_xdl_cshuffle.hpp to use kernel_elementwise instead of kernel_binary_elementwise * Add DeviceElementwiseBase and use it in device_normalize_instance.cpp * Removing and renaming files * Update to synchronize gemm_layernorm client example to the generic element-wise device op API * Update to synchronize with the latest headers directory and HostTensorDescriptor interface renaming * Merge two static member functions in device_elementwise.hpp * Remove unary_elementwise_1d kernel and device
2026-04-20 06:49:15 +00:00 · 2022-08-15 23:11:02 +08:00
parent 5ee304595c
commit 53ea4713af
47 changed files with 5195 additions and 1707 deletions
--- a/example/34_batchnorm/batchnorm_common.hpp
+++ b/example/34_batchnorm/batchnorm_common.hpp
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#pragma once
+
+#include <cassert>
+#include <vector>
+#include <array>
+#include <type_traits>
+
+#include "ck/utility/data_type.hpp"
+
+// binary operation used to calculate invVariance from mean and meansquare
+struct InvVariance
+{
+    InvVariance(double epsilon) : epsilon_(epsilon){};
+
+    template <typename T>
+    __host__ __device__ constexpr void operator()(T& y, const T& mean, const T& meansquare) const
+    {
+        static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
+                      "Data type is not supported by this operation!");
+
+        using ck::type_convert;
+        using ck::math::sqrt;
+
+        T tmp_epsilon = type_convert<T>(epsilon_);
+
+        y = meansquare - mean * mean;
+        y = 1.0f / sqrt(tmp_epsilon + y);
+    };
+
+    double epsilon_;
+};
+
+// (4-in, 2-out) element-wise operation used to update the moving average of mean and variance
+struct MovingAverage
+{
+    MovingAverage(double factor) : factor_(factor){};
+
+    template <typename T>
+    __host__ __device__ constexpr void operator()(T& y0,
+                                                  T& y1,
+                                                  const T& mean,
+                                                  const T& runningMean,
+                                                  const T& meansquare,
+                                                  const T& runningVariance) const
+    {
+        static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
+                      "Data type is not supported by this operation!");
+
+        using ck::type_convert;
+
+        T tmp_factor = type_convert<T>(factor_);
+        T variance   = meansquare - mean * mean;
+
+        y0 = runningMean * (type_convert<T>(1.0f) - tmp_factor) + mean * tmp_factor;
+        y1 = runningVariance * (type_convert<T>(1.0f) - tmp_factor) + variance * tmp_factor;
+    };
+
+    double factor_;
+};
+
+struct MovingAverageAndInvVariance
+{
+    MovingAverageAndInvVariance(double epsilon, double factor)
+        : epsilon_(epsilon), factor_(factor){};
+
+    template <typename T>
+    __host__ __device__ constexpr void operator()(T& y0, // resultRunningMean
+                                                  T& y1, // resultRunningVariance
+                                                  T& y2, // saveInvVariance
+                                                  const T& mean,
+                                                  const T& runningMean,
+                                                  const T& meansquare,
+                                                  const T& runningVariance) const
+    {
+        static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
+                      "Data type is not supported by this operation!");
+
+        using ck::type_convert;
+        using ck::math::sqrt;
+
+        T tmp_epsilon = type_convert<T>(epsilon_);
+        T tmp_factor  = type_convert<T>(factor_);
+        T variance    = meansquare - mean * mean;
+
+        y0 = runningMean * (type_convert<T>(1.0f) - tmp_factor) + mean * tmp_factor;
+        y1 = runningVariance * (type_convert<T>(1.0f) - tmp_factor) + variance * tmp_factor;
+
+        y2 = 1.0f / sqrt(tmp_epsilon + variance);
+    };
+
+    double epsilon_;
+    double factor_;
+};
+
+struct NormalizeInInfer
+{
+    NormalizeInInfer(double epsilon = 1e-4) : epsilon_(epsilon) {}
+
+    template <typename T1, typename T2>
+    __host__ __device__ constexpr void operator()(T1& y,
+                                                  const T1& x,
+                                                  const T2& mean,
+                                                  const T2& variance,
+                                                  const T2& gamma,
+                                                  const T2& beta) const
+    {
+        static_assert(std::is_same<T2, float>::value || std::is_same<T2, double>::value,
+                      "Data type is not supported by this operation!");
+
+        using ck::type_convert;
+        using ck::math::sqrt;
+
+        T2 tmp_x, tmp_y;
+
+        tmp_x = type_convert<T2>(x);
+
+        tmp_y = ((tmp_x - mean) / sqrt(variance + type_convert<T2>(epsilon_))) * gamma + beta;
+        y     = type_convert<T1>(tmp_y);
+    };
+
+    double epsilon_;
+};
+
+struct NormalizeInForward
+{
+    NormalizeInForward(double epsilon = 1e-4) : epsilon_(epsilon) {}
+
+    template <typename T1, typename T2>
+    __host__ __device__ constexpr void operator()(T1& y,
+                                                  const T1& x,
+                                                  const T2& mean,
+                                                  const T2& meansquare,
+                                                  const T2& gamma,
+                                                  const T2& beta) const
+    {
+        static_assert(std::is_same<T2, float>::value || std::is_same<T2, double>::value,
+                      "Data type is not supported by this operation!");
+
+        using ck::type_convert;
+        using ck::math::sqrt;
+
+        T2 tmp_x, tmp_y;
+        T2 variance = meansquare - mean * mean;
+
+        tmp_x = type_convert<T2>(x);
+
+        tmp_y = ((tmp_x - mean) / sqrt(variance + type_convert<T2>(epsilon_))) * gamma + beta;
+        y     = type_convert<T1>(tmp_y);
+    };
+
+    double epsilon_;
+};
+
+template <int Rank, int NumReduceDim>
+static inline std::array<int, Rank - NumReduceDim>
+get_invariant_dims(const std::array<int, NumReduceDim>& reduceDims)
+{
+    int reduceFlag = 0;
+
+    // flag the bits for the reduceDims
+    for(int i = 0; i < NumReduceDim; i++)
+    {
+        reduceFlag |= 1 << reduceDims[i];
+    };
+
+    std::array<int, Rank - NumReduceDim> invariantDims;
+
+    // collect invariant dimensions
+    int dim = 0;
+    for(int i = 0; i < Rank; i++)
+        if((reduceFlag & (1 << i)) == 0)
+        {
+            invariantDims[dim] = i;
+            dim++;
+        };
+
+    return invariantDims;
+};