mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
Layernorm4d (#1022)
* Rename folder * Add layernorm 4d fwd example * Rename original layernorm example * Add layernorm 4d f16 test * Add layernorm4d_fwd client example * Support layernorm4D in ckProfiler * Rename groupnorm to groupnorm fwd in example * Rename layernorm and group fwd in test * Rename normalization to normalization_fwd (instances) * Add fwd to DeviceNormalization * Rename external api header * Rename folder, because we can also add bwd in this folder * Add fwd in layernorm and groupnorm (profiler * Fix compile error --------- Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
This commit is contained in:
@@ -19,7 +19,7 @@ template <typename XDataType,
|
||||
typename YElementwiseOperation,
|
||||
index_t Rank,
|
||||
index_t NumReduceDim>
|
||||
struct DeviceNormalization : public BaseOperator
|
||||
struct DeviceNormalizationFwd : public BaseOperator
|
||||
{
|
||||
virtual std::unique_ptr<BaseArgument>
|
||||
MakeArgumentPointer(const std::vector<index_t> lengths,
|
||||
@@ -50,14 +50,14 @@ template <typename XDataType,
|
||||
typename YElementwiseOperation,
|
||||
index_t Rank,
|
||||
index_t NumReduceDim>
|
||||
using DeviceNormalizationPtr = std::unique_ptr<DeviceNormalization<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementwiseOperation,
|
||||
Rank,
|
||||
NumReduceDim>>;
|
||||
using DeviceNormalizationFwdPtr = std::unique_ptr<DeviceNormalizationFwd<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementwiseOperation,
|
||||
Rank,
|
||||
NumReduceDim>>;
|
||||
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
@@ -7,7 +7,7 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "ck/utility/reduction_operator.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_normalization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_normalization_fwd.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_reduce.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp"
|
||||
#include "ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp"
|
||||
@@ -46,14 +46,14 @@ template <typename XDataType,
|
||||
index_t YDstVectorSize,
|
||||
index_t SaveMeanInvStdDstVectorSize,
|
||||
bool UseWelford = true>
|
||||
struct DeviceNormalizationImpl : public DeviceNormalization<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementwiseOperation,
|
||||
Rank,
|
||||
NumReduceDim>
|
||||
struct DeviceNormalizationFwdImpl : public DeviceNormalizationFwd<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementwiseOperation,
|
||||
Rank,
|
||||
NumReduceDim>
|
||||
{
|
||||
static_assert(BlockSize == MThreadClusterSize * KThreadClusterSize);
|
||||
static_assert(
|
||||
@@ -461,7 +461,7 @@ struct DeviceNormalizationImpl : public DeviceNormalization<XDataType,
|
||||
auto str = std::stringstream();
|
||||
|
||||
// clang-format off
|
||||
str << "DeviceNormalizationImpl<" << BlockSize << ",";
|
||||
str << "DeviceNormalizationFwdImpl<" << BlockSize << ",";
|
||||
str << "Cluster_MK_" << MThreadClusterSize << "_" << KThreadClusterSize << ",";
|
||||
str << "Slice_MK_" << MThreadSliceSize << "_" << KThreadSliceSize << ",";
|
||||
str << "XYSrcVectorDim_" << XYSrcVectorDim << ",";
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
#include "ck/utility/reduction_operator.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_normalization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_normalization_fwd.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_reduce.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp"
|
||||
#include "ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp"
|
||||
@@ -134,14 +134,14 @@ template <typename XDataType,
|
||||
index_t BetaSrcVectorSize,
|
||||
index_t YDstVectorSize,
|
||||
index_t SaveMeanInvStdDstVectorSize>
|
||||
struct DeviceNormalizationSplitKImpl : public DeviceNormalization<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementwiseOperation,
|
||||
Rank,
|
||||
NumReduceDim>
|
||||
struct DeviceNormalizationFwdSplitKImpl : public DeviceNormalizationFwd<XDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
YDataType,
|
||||
SaveMeanInvStdDataType,
|
||||
YElementwiseOperation,
|
||||
Rank,
|
||||
NumReduceDim>
|
||||
{
|
||||
using WorkspaceMeanVarDataType = SaveMeanInvStdDataType;
|
||||
|
||||
@@ -732,7 +732,7 @@ struct DeviceNormalizationSplitKImpl : public DeviceNormalization<XDataType,
|
||||
auto str = std::stringstream();
|
||||
|
||||
// clang-format off
|
||||
str << "DeviceNormalizationSplitKImpl<" << BlockSize << ",";
|
||||
str << "DeviceNormalizationFwdSplitKImpl<" << BlockSize << ",";
|
||||
str << "Cluster_MK_" << MThreadClusterSize << "_" << KThreadClusterSize << ",";
|
||||
str << "Slice_MK_" << MThreadSliceSize << "_" << KThreadSliceSize << ",";
|
||||
str << "XYSrcVectorDim_" << XYVectorDim << ",";
|
||||
Reference in New Issue
Block a user