Layernorm4d (#1022)

* Rename folder

* Add layernorm 4d fwd example

* Rename original layernorm example

* Add layernorm 4d f16  test

* Add layernorm4d_fwd client example

* Support layernorm4D in ckProfiler

* Rename groupnorm to groupnorm fwd in example

* Rename layernorm and group fwd in test

* Rename normalization to normalization_fwd (instances)

* Add fwd to DeviceNormalization

* Rename external api header

* Rename folder, because we can also add bwd in this folder

* Add fwd in layernorm and groupnorm (profiler

* Fix compile error

---------

Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>

[ROCm/composable_kernel commit: a3d9a2cd42]
This commit is contained in:
rocking
2023-11-09 08:34:51 +08:00
committed by GitHub
parent e857da2f34
commit b64f30e733
59 changed files with 1271 additions and 675 deletions

View File

@@ -7,7 +7,7 @@
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization_fwd.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
@@ -88,14 +88,14 @@ bool profile_groupnorm_impl(int do_verification,
beta_dev.ToDevice(beta.mData.data());
// add device normalization instances
using DeviceOp = ck::tensor_operation::device::DeviceNormalization<XDataType,
GammaDataType,
BetaDataType,
YDataType,
SaveMeanInvStdDataType,
PassThrough,
5,
3>;
using DeviceOp = ck::tensor_operation::device::DeviceNormalizationFwd<XDataType,
GammaDataType,
BetaDataType,
YDataType,
SaveMeanInvStdDataType,
PassThrough,
5,
3>;
// get device op instances
const auto instance_ptrs =

View File

@@ -6,7 +6,7 @@
#include <iomanip>
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization_fwd.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
@@ -94,14 +94,14 @@ bool profile_layernorm_impl(int do_verification,
constexpr int NumReduceDim = Rank - 1;
// add device normalization instances
using DeviceOp = ck::tensor_operation::device::DeviceNormalization<XDataType,
GammaDataType,
BetaDataType,
YDataType,
SaveMeanInvStdDataType,
PassThrough,
Rank,
NumReduceDim>;
using DeviceOp = ck::tensor_operation::device::DeviceNormalizationFwd<XDataType,
GammaDataType,
BetaDataType,
YDataType,
SaveMeanInvStdDataType,
PassThrough,
Rank,
NumReduceDim>;
// get device op instances
const auto instance_ptrs =