mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 12:30:16 +00:00
Layernorm4d (#1022)
* Rename folder
* Add layernorm 4d fwd example
* Rename original layernorm example
* Add layernorm 4d f16 test
* Add layernorm4d_fwd client example
* Support layernorm4D in ckProfiler
* Rename groupnorm to groupnorm fwd in example
* Rename layernorm and group fwd in test
* Rename normalization to normalization_fwd (instances)
* Add fwd to DeviceNormalization
* Rename external api header
* Rename folder, because we can also add bwd in this folder
* Add fwd in layernorm and groupnorm (profiler
* Fix compile error
---------
Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
[ROCm/composable_kernel commit: a3d9a2cd42]
This commit is contained in:
@@ -16,8 +16,8 @@ set(PROFILER_SOURCES
|
||||
profile_grouped_conv_fwd.cpp
|
||||
profile_grouped_conv_bwd_weight.cpp
|
||||
profile_reduce.cpp
|
||||
profile_groupnorm.cpp
|
||||
profile_layernorm.cpp
|
||||
profile_groupnorm_fwd.cpp
|
||||
profile_layernorm_fwd.cpp
|
||||
profile_max_pool3d_fwd.cpp
|
||||
profile_avg_pool3d_bwd.cpp
|
||||
profile_max_pool3d_bwd.cpp
|
||||
@@ -77,7 +77,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_w
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_weight_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_add_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_fwd_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_softmax_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_reduce_instance)
|
||||
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batchnorm_instance)
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include "profiler/data_type_enum.hpp"
|
||||
#include "profiler/profile_groupnorm_impl.hpp"
|
||||
#include "profiler/profile_groupnorm_fwd_impl.hpp"
|
||||
#include "profiler_operation_registry.hpp"
|
||||
|
||||
using ck::index_t;
|
||||
@@ -6,7 +6,7 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include "profiler/data_type_enum.hpp"
|
||||
#include "profiler/profile_layernorm_impl.hpp"
|
||||
#include "profiler/profile_layernorm_fwd_impl.hpp"
|
||||
#include "profiler_operation_registry.hpp"
|
||||
|
||||
using ck::index_t;
|
||||
@@ -76,19 +76,46 @@ int profile_layernorm(int argc, char* argv[])
|
||||
arg_parser(argc, argv);
|
||||
const std::vector<index_t> length = arg_parser.long_opts["length"];
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
constexpr int rank = 2;
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
if(data_type == ck::DataTypeEnum::Half)
|
||||
if(length.size() == 2)
|
||||
{
|
||||
ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, F32, false, rank>(
|
||||
do_verification, init_method, do_log, time_kernel, length);
|
||||
constexpr int rank = 2;
|
||||
|
||||
if(data_type == ck::DataTypeEnum::Half)
|
||||
{
|
||||
ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, F32, false, rank>(
|
||||
do_verification, init_method, do_log, time_kernel, length);
|
||||
}
|
||||
else if(data_type == ck::DataTypeEnum::Float)
|
||||
{
|
||||
ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, F32, false, rank>(
|
||||
do_verification, init_method, do_log, time_kernel, length);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("not implemented yet");
|
||||
}
|
||||
}
|
||||
else if(data_type == ck::DataTypeEnum::Float)
|
||||
else if(length.size() == 4)
|
||||
{
|
||||
ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, F32, false, rank>(
|
||||
do_verification, init_method, do_log, time_kernel, length);
|
||||
constexpr int rank = 4;
|
||||
|
||||
if(data_type == ck::DataTypeEnum::Half)
|
||||
{
|
||||
ck::profiler::profile_layernorm_impl<F16, F16, F16, F32, F16, F32, false, rank>(
|
||||
do_verification, init_method, do_log, time_kernel, length);
|
||||
}
|
||||
else if(data_type == ck::DataTypeEnum::Float)
|
||||
{
|
||||
ck::profiler::profile_layernorm_impl<F32, F32, F32, F32, F32, F32, false, rank>(
|
||||
do_verification, init_method, do_log, time_kernel, length);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("not implemented yet");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Reference in New Issue
Block a user