Layernorm4d (#1022)

* Rename folder

* Add layernorm 4d fwd example

* Rename original layernorm example

* Add layernorm 4d f16  test

* Add layernorm4d_fwd client example

* Support layernorm4D in ckProfiler

* Rename groupnorm to groupnorm fwd in example

* Rename layernorm and group fwd in test

* Rename normalization to normalization_fwd (instances)

* Add fwd to DeviceNormalization

* Rename external api header

* Rename folder, because we can also add bwd in this folder

* Add fwd in layernorm and groupnorm (profiler

* Fix compile error

---------

Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>

[ROCm/composable_kernel commit: a3d9a2cd42]
This commit is contained in:
rocking
2023-11-09 08:34:51 +08:00
committed by GitHub
parent e857da2f34
commit b64f30e733
59 changed files with 1271 additions and 675 deletions

View File

@@ -16,8 +16,8 @@ set(PROFILER_SOURCES
profile_grouped_conv_fwd.cpp
profile_grouped_conv_bwd_weight.cpp
profile_reduce.cpp
profile_groupnorm.cpp
profile_layernorm.cpp
profile_groupnorm_fwd.cpp
profile_layernorm_fwd.cpp
profile_max_pool3d_fwd.cpp
profile_avg_pool3d_bwd.cpp
profile_max_pool3d_bwd.cpp
@@ -77,7 +77,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_w
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_weight_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_add_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_fwd_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_softmax_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_reduce_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batchnorm_instance)

View File

@@ -6,7 +6,7 @@
#include <unordered_map>
#include "profiler/data_type_enum.hpp"
#include "profiler/profile_groupnorm_impl.hpp"
#include "profiler/profile_groupnorm_fwd_impl.hpp"
#include "profiler_operation_registry.hpp"
using ck::index_t;

View File

@@ -6,7 +6,7 @@
#include <unordered_map>
#include "profiler/data_type_enum.hpp"
#include "profiler/profile_layernorm_impl.hpp"
#include "profiler/profile_layernorm_fwd_impl.hpp"
#include "profiler_operation_registry.hpp"
using ck::index_t;
@@ -76,19 +76,46 @@ int profile_layernorm(int argc, char* argv[])
arg_parser(argc, argv);
const std::vector<index_t> length = arg_parser.long_opts["length"];
using F16 = ck::half_t;
using F32 = float;
constexpr int rank = 2;
using F16 = ck::half_t;
using F32 = float;
if(data_type == ck::DataTypeEnum::Half)
if(length.size() == 2)
{
ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, F32, false, rank>(
do_verification, init_method, do_log, time_kernel, length);
constexpr int rank = 2;
if(data_type == ck::DataTypeEnum::Half)
{
ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, F32, false, rank>(
do_verification, init_method, do_log, time_kernel, length);
}
else if(data_type == ck::DataTypeEnum::Float)
{
ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, F32, false, rank>(
do_verification, init_method, do_log, time_kernel, length);
}
else
{
throw std::runtime_error("not implemented yet");
}
}
else if(data_type == ck::DataTypeEnum::Float)
else if(length.size() == 4)
{
ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, F32, false, rank>(
do_verification, init_method, do_log, time_kernel, length);
constexpr int rank = 4;
if(data_type == ck::DataTypeEnum::Half)
{
ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, F32, false, rank>(
do_verification, init_method, do_log, time_kernel, length);
}
else if(data_type == ck::DataTypeEnum::Float)
{
ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, F32, false, rank>(
do_verification, init_method, do_log, time_kernel, length);
}
else
{
throw std::runtime_error("not implemented yet");
}
}
else
{