mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-17 19:40:04 +00:00
Fused elementwise layernorm (#468)
* add fused addition lyernorm
* add fused addition lyernorm
* changed CMakelist
* removed annotates
* modified descriptor of C
* fixed bug in gridwise add layernorm
* format the files
* modified name from add&layernorm into elementwise&layernorm
* created fused elementwise layernorm branch
* change input into tuple type
* add sweep once to reduce load & read of C from global memory
* modified Argument api
* modified way to malloc c in global memory
* changed gamma and beta to m_k_desc
* fixed bug when sweep once and move CDataType when define device level struct
* add src dim for gamma and beta
* implement optimization for coalesced
* delete a annotation line
* fixed some bug to meet the requirements of ck
* add bandwidth computing in example, and fixed the time unit
* move device_elementwise_layernorm_impl.hpp into device/impl
* fixed bug in device_elementwise_layernorm_impl.hpp
* changed name from layernorm into normalization
* clang-format the changed files
* changed the names
* moved immidiate results into lds, it become faster in non-sweeponce cases
* changed naming of C into X to make the defination more clear
* changed naming in example
* add tests for elementwise normalization
* move example_elementwise_layernorm_blockwise into folder 44_elementwise_normalization
* move test_elementwise_layernorm_fp16 into new folder
* move elementwise_normalization_instances into a new folder
* add more tests in test_elementwise_layernorm_fp16.cpp
* added some corner cases in test
* fixed method to compute lds size for matrix X
* changed name of 44_elementwise_normalization into 45_elementwise_normalization
* modified some comments
* modified some other confused comments
* reduce redundant tests in test_elementwise_layernorm_fp16.cpp
[ROCm/composable_kernel commit: efbcc6eddc]
This commit is contained in:
@@ -52,3 +52,4 @@ add_subdirectory(block_to_ctile_map)
|
||||
add_subdirectory(softmax)
|
||||
add_subdirectory(normalization)
|
||||
add_subdirectory(data_type)
|
||||
add_subdirectory(elementwise_normalization)
|
||||
|
||||
7
test/elementwise_normalization/CMakeLists.txt
Normal file
7
test/elementwise_normalization/CMakeLists.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
add_custom_target(test_elementwise_normalization)
|
||||
|
||||
add_gtest_executable(test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp)
|
||||
|
||||
target_link_libraries(test_elementwise_layernorm_fp16 PRIVATE utility device_elementwise_normalization_instance)
|
||||
|
||||
add_dependencies(test_elementwise_normalization test_elementwise_layernorm_fp16)
|
||||
@@ -0,0 +1,47 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/include/profile_elementwise_layernorm_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
using ck::index_t;
|
||||
|
||||
template <typename Tuple>
|
||||
class TestElementwiseLayernorm : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
using GammaDataType = std::tuple_element_t<2, Tuple>;
|
||||
using BetaDataType = std::tuple_element_t<3, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<4, Tuple>;
|
||||
using YDataType = std::tuple_element_t<5, Tuple>;
|
||||
|
||||
void Run()
|
||||
{
|
||||
// M, N
|
||||
std::vector<std::vector<ck::index_t>> lengths = {
|
||||
{1, 1}, {25, 16}, {39, 777}, {100, 200}, {1024, 1024}, {48 * 256, 2048}};
|
||||
|
||||
for(auto length : lengths)
|
||||
{
|
||||
bool success = ck::profiler::profile_elementwise_layernorm_impl<ADataType,
|
||||
BDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
AccDataType,
|
||||
YDataType>(
|
||||
true, 2, false, false, length);
|
||||
EXPECT_TRUE(success);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using KernelTypes = ::testing::Types<
|
||||
// ADataType, BDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
|
||||
std::tuple<F16, F16, F16, F16, F32, F16>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestElementwiseLayernorm, KernelTypes);
|
||||
TYPED_TEST(TestElementwiseLayernorm, Test_FP16) { this->Run(); }
|
||||
@@ -3,9 +3,9 @@ add_custom_target(test_layernorm)
|
||||
add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
|
||||
add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
|
||||
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
|
||||
|
||||
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_layernorm2d_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
|
||||
target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
|
||||
@@ -14,4 +14,3 @@ add_dependencies(test_layernorm test_layernorm2d_fp32)
|
||||
add_dependencies(test_layernorm test_layernorm2d_fp16)
|
||||
add_dependencies(test_layernorm test_groupnorm_fp16)
|
||||
add_dependencies(test_layernorm test_groupnorm_fp32)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user