Layernorm4d (#1022)

* Rename folder

* Add layernorm 4d fwd example

* Rename original layernorm example

* Add layernorm 4d f16  test

* Add layernorm4d_fwd client example

* Support layernorm4D in ckProfiler

* Rename groupnorm to groupnorm fwd in example

* Rename layernorm and group fwd in test

* Rename normalization to normalization_fwd (instances)

* Add fwd to DeviceNormalization

* Rename external api header

* Rename folder, because we can also add bwd in this folder

* Add fwd in layernorm and groupnorm (profiler

* Fix compile error

---------

Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>

[ROCm/composable_kernel commit: a3d9a2cd42]
This commit is contained in:
rocking
2023-11-09 08:34:51 +08:00
committed by GitHub
parent e857da2f34
commit b64f30e733
59 changed files with 1271 additions and 675 deletions

View File

@@ -0,0 +1,30 @@
add_custom_target(test_normalization_fwd)
add_gtest_executable(test_layernorm2d_fwd_fp32 test_layernorm2d_fwd_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm2d_fwd_fp32 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_layernorm2d_fwd_fp32)
endif()
add_gtest_executable(test_groupnorm_fwd_fp32 test_groupnorm_fwd_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_groupnorm_fwd_fp32 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_groupnorm_fwd_fp32)
endif()
add_gtest_executable(test_layernorm2d_fwd_fp16 test_layernorm2d_fwd_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm2d_fwd_fp16 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_layernorm2d_fwd_fp16)
endif()
add_gtest_executable(test_layernorm4d_fwd_fp16 test_layernorm4d_fwd_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm4d_fwd_fp16 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_layernorm4d_fwd_fp16)
endif()
add_gtest_executable(test_groupnorm_fwd_fp16 test_groupnorm_fwd_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(test_groupnorm_fwd_fp16 PRIVATE utility device_normalization_fwd_instance)
add_dependencies(test_normalization_fwd test_groupnorm_fwd_fp16)
endif()

View File

@@ -0,0 +1,54 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_groupnorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestGroupnorm : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
using SaveMeanInvStdDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// [N, H, W, G, C], reduce H, W, C
std::vector<std::vector<ck::index_t>> lengths = {{1, 1, 1, 1, 1},
{1, 2, 3, 4, 5},
{256, 9, 9, 9, 9},
{1, 64, 64, 32, 10},
{1, 32, 32, 32, 20},
{2, 32, 32, 32, 30},
{2, 32, 32, 32, 40},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
{
bool success =
ck::profiler::profile_groupnorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType>
std::tuple<F16, F16, F16, F32, F16, F32>>;
TYPED_TEST_SUITE(TestGroupnorm, KernelTypes);
TYPED_TEST(TestGroupnorm, Test_FP16) { this->Run(); }

View File

@@ -0,0 +1,52 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_groupnorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestGroupnorm : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
using SaveMeanInvStdDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// [N, H, W, G, C], reduce H, W, C
std::vector<std::vector<ck::index_t>> lengths = {{1, 1, 1, 1, 1},
{1, 2, 3, 4, 5},
{256, 9, 9, 9, 9},
{1, 64, 64, 32, 10},
{1, 32, 32, 32, 20},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
{
bool success =
ck::profiler::profile_groupnorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType>
std::tuple<F32, F32, F32, F32, F32, F32>>;
TYPED_TEST_SUITE(TestGroupnorm, KernelTypes);
TYPED_TEST(TestGroupnorm, Test_FP32) { this->Run(); }

View File

@@ -0,0 +1,48 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestLayernorm2d : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
using SaveMeanInvStdDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// [N, D], reduce D
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
{
bool success = ck::profiler::profile_layernorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true,
2>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType>
std::tuple<F16, F16, F16, F32, F16, F32>>;
TYPED_TEST_SUITE(TestLayernorm2d, KernelTypes);
TYPED_TEST(TestLayernorm2d, Test_FP16) { this->Run(); }

View File

@@ -0,0 +1,48 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestLayernorm2d : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
using SaveMeanInvStdDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// [N, D], reduce D
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
{
bool success = ck::profiler::profile_layernorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true,
2>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType>
std::tuple<F32, F32, F32, F32, F32, F32>>;
TYPED_TEST_SUITE(TestLayernorm2d, KernelTypes);
TYPED_TEST(TestLayernorm2d, Test_FP32) { this->Run(); }

View File

@@ -0,0 +1,48 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_fwd_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestLayernorm4d : public ::testing::Test
{
protected:
using XDataType = std::tuple_element_t<0, Tuple>;
using GammaDataType = std::tuple_element_t<1, Tuple>;
using BetaDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using YDataType = std::tuple_element_t<4, Tuple>;
using SaveMeanInvStdDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// [N, D], reduce D
std::vector<std::vector<ck::index_t>> lengths = {
{1, 1, 1, 1}, {7, 7, 7, 7}, {256, 16, 16, 8}};
for(auto length : lengths)
{
bool success = ck::profiler::profile_layernorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true,
4>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType>
std::tuple<F16, F16, F16, F32, F16, F32>>;
TYPED_TEST_SUITE(TestLayernorm4d, KernelTypes);
TYPED_TEST(TestLayernorm4d, Test_FP16) { this->Run(); }