Layernorm4d (#1022)

* Rename folder

* Add layernorm 4d fwd example

* Rename original layernorm example

* Add layernorm 4d f16  test

* Add layernorm4d_fwd client example

* Support layernorm4D in ckProfiler

* Rename groupnorm to groupnorm fwd in example

* Rename layernorm and group fwd in test

* Rename normalization to normalization_fwd (instances)

* Add fwd to DeviceNormalization

* Rename external api header

* Rename folder, because we can also add bwd in this folder

* Add fwd in layernorm and groupnorm (profiler

* Fix compile error

---------

Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>

[ROCm/composable_kernel commit: a3d9a2cd42]
This commit is contained in:
rocking
2023-11-09 08:34:51 +08:00
committed by GitHub
parent e857da2f34
commit b64f30e733
59 changed files with 1271 additions and 675 deletions

View File

@@ -139,7 +139,7 @@ add_subdirectory(grouped_convnd_fwd)
add_subdirectory(grouped_convnd_bwd_weight)
add_subdirectory(block_to_ctile_map)
add_subdirectory(softmax)
add_subdirectory(normalization)
add_subdirectory(normalization_fwd)
add_subdirectory(data_type)
add_subdirectory(elementwise_normalization)
add_subdirectory(batchnorm)

View File

@@ -1,21 +0,0 @@
add_custom_target(test_normalization)
add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance)
add_dependencies(test_normalization test_layernorm2d_fp32)
endif()
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
add_dependencies(test_normalization test_groupnorm_fp32)
endif()
add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance)
add_dependencies(test_normalization test_layernorm2d_fp16)
endif()
add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
add_dependencies(test_normalization test_groupnorm_fp16)
endif()

View File

@@ -0,0 +1,30 @@
add_custom_target(test_normalization_fwd)
add_gtest_executable(test_layernorm2d_fwd_fp32 test_layernorm2d_fwd_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm2d_fwd_fp32 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_layernorm2d_fwd_fp32)
endif()
add_gtest_executable(test_groupnorm_fwd_fp32 test_groupnorm_fwd_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_groupnorm_fwd_fp32 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_groupnorm_fwd_fp32)
endif()
add_gtest_executable(test_layernorm2d_fwd_fp16 test_layernorm2d_fwd_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm2d_fwd_fp16 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_layernorm2d_fwd_fp16)
endif()
add_gtest_executable(test_layernorm4d_fwd_fp16 test_layernorm4d_fwd_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm4d_fwd_fp16 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_layernorm4d_fwd_fp16)
endif()
add_gtest_executable(test_groupnorm_fwd_fp16 test_groupnorm_fwd_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_groupnorm_fwd_fp16 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_groupnorm_fwd_fp16)
endif()

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_groupnorm_impl.hpp"
#include "profiler/profile_groupnorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_groupnorm_impl.hpp"
#include "profiler/profile_groupnorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_impl.hpp"
#include "profiler/profile_layernorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -2,7 +2,7 @@
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_impl.hpp"
#include "profiler/profile_layernorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;

View File

@@ -0,0 +1,48 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestLayernorm4d : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
using SaveMeanInvStdDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// [N, D], reduce D
std::vector<std::vector<ck::index_t>> lengths = {
{1, 1, 1, 1}, {7, 7, 7, 7}, {256, 16, 16, 8}};
for(auto length : lengths)
{
bool success = ck::profiler::profile_layernorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true,
4>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType>
std::tuple<F16, F16, F16, F32, F16, F32>>;
TYPED_TEST_SUITE(TestLayernorm4d, KernelTypes);
TYPED_TEST(TestLayernorm4d, Test_FP16) { this->Run(); }