mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
Fused elementwise normalization (#492)
* add fused addition lyernorm * add fused addition lyernorm * changed CMakelist * removed annotates * modified descriptor of C * fixed bug in gridwise add layernorm * format the files * modified name from add&layernorm into elementwise&layernorm * created fused elementwise layernorm branch * change input into tuple type * add sweep once to reduce load & read of C from global memory * modified Argument api * modified way to malloc c in global memory * changed gamma and beta to m_k_desc * fixed bug when sweep once and move CDataType when define device level struct * add src dim for gamma and beta * implement optimization for coalesced * delete a annotation line * fixed some bug to meet the requirements of ck * add bandwidth computing in example, and fixed the time unit * move device_elementwise_layernorm_impl.hpp into device/impl * fixed bug in device_elementwise_layernorm_impl.hpp * changed name from layernorm into normalization * clang-format the changed files * changed the names * moved immidiate results into lds, it become faster in non-sweeponce cases * changed naming of C into X to make the defination more clear * changed naming in example * add tests for elementwise normalization * move example_elementwise_layernorm_blockwise into folder 44_elementwise_normalization * move test_elementwise_layernorm_fp16 into new folder * move elementwise_normalization_instances into a new folder * add more tests in test_elementwise_layernorm_fp16.cpp * added some corner cases in test * fixed method to compute lds size for matrix X * changed name of 44_elementwise_normalization into 45_elementwise_normalization * modified some comments * modified some other confused comments * reduce redundant tests in test_elementwise_layernorm_fp16.cpp
This commit is contained in:
7
test/elementwise_normalization/CMakeLists.txt
Normal file
7
test/elementwise_normalization/CMakeLists.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
add_custom_target(test_elementwise_normalization)
|
||||
|
||||
add_gtest_executable(test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp)
|
||||
|
||||
target_link_libraries(test_elementwise_layernorm_fp16 PRIVATE utility device_elementwise_normalization_instance)
|
||||
|
||||
add_dependencies(test_elementwise_normalization test_elementwise_layernorm_fp16)
|
||||
@@ -0,0 +1,47 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "profiler/include/profile_elementwise_layernorm_impl.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
using ck::index_t;
|
||||
|
||||
template <typename Tuple>
|
||||
class TestElementwiseLayernorm : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
using GammaDataType = std::tuple_element_t<2, Tuple>;
|
||||
using BetaDataType = std::tuple_element_t<3, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<4, Tuple>;
|
||||
using YDataType = std::tuple_element_t<5, Tuple>;
|
||||
|
||||
void Run()
|
||||
{
|
||||
// M, N
|
||||
std::vector<std::vector<ck::index_t>> lengths = {
|
||||
{1, 1}, {25, 16}, {39, 777}, {100, 200}, {1024, 1024}, {48 * 256, 2048}};
|
||||
|
||||
for(auto length : lengths)
|
||||
{
|
||||
bool success = ck::profiler::profile_elementwise_layernorm_impl<ADataType,
|
||||
BDataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
AccDataType,
|
||||
YDataType>(
|
||||
true, 2, false, false, length);
|
||||
EXPECT_TRUE(success);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using KernelTypes = ::testing::Types<
|
||||
// ADataType, BDataType, GammaDataType, BetaDataType, AccDataType, YDataType>
|
||||
std::tuple<F16, F16, F16, F16, F32, F16>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestElementwiseLayernorm, KernelTypes);
|
||||
TYPED_TEST(TestElementwiseLayernorm, Test_FP16) { this->Run(); }
|
||||
Reference in New Issue
Block a user