layernorm and groupnorm backward data (#1083)

* rename folder

* Add type string

* Remove typo

* Add deviceOp to backward x

* Add comment to describe the behavior of backward normalization

* Add kernel function, prepare to implement

* implement generic kernel

* Check vector size

* Add sweep once pipeline for small reduce size

* Fix bug of KRaw_ error

* Fix bug of dx stride

* sanity check for mean and rstd

* backward x for groupnorm

* Add bwd x instance

* add layernorm 2d bwd gamma beta instances

* Change save mean var type from f32 to f16 in f16 mode

* Change the example to f16

* Add groupnorm bwd gamma beta instance

* Add groupnorm bwd x instance

* Fix naming

* Add layernorm bwd x ckprofiler

* Add groupnorm bwd x profiler

* clang format

* Rename bwd x to bwd data

* Fix bug of verification in profiler

* Add test of layernorm and groupnorm bwd data

* Add missing cmake

* Add layernorm2d bwd data

* rename fwd example

* Add groupnorm client example

* Fix typo. replace Invarient with Invariant

* Add checking before running the best instance

[ROCm/composable_kernel commit: a69aa2a11a]
This commit is contained in:
rocking
2023-12-19 04:23:11 +08:00
committed by GitHub
parent 9f2d90a8b6
commit 53eab49062
65 changed files with 3050 additions and 110 deletions

View File

@@ -0,0 +1,13 @@
add_custom_target(test_normalization_bwd_data)
add_gtest_executable(test_layernorm2d_bwd_data_fp32 test_layernorm2d_bwd_data_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_layernorm2d_bwd_data_fp32 PRIVATE utility device_normalization_bwd_data_instance)
add_dependencies(test_normalization_bwd_data test_layernorm2d_bwd_data_fp32)
endif()
add_gtest_executable(test_groupnorm_bwd_data_fp32 test_groupnorm_bwd_data_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(test_groupnorm_bwd_data_fp32 PRIVATE utility device_normalization_bwd_data_instance)
add_dependencies(test_normalization_bwd_data test_groupnorm_bwd_data_fp32)
endif()

View File

@@ -0,0 +1,51 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_groupnorm_bwd_data_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestgroupnormBwdData : public ::testing::Test
{
protected:
using DYDataType = std::tuple_element_t<0, Tuple>;
using XDataType = std::tuple_element_t<1, Tuple>;
using GammaDataType = std::tuple_element_t<2, Tuple>;
using MeanInvStdDataType = std::tuple_element_t<3, Tuple>;
using ComputeDataType = std::tuple_element_t<4, Tuple>;
using DXDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// Bwd data: [N, H, W, G, C], reduce H, W, C
std::vector<std::vector<ck::index_t>> lengths = {{1, 1, 1, 1, 1},
{1, 2, 3, 4, 5},
{256, 9, 9, 9, 9},
{1, 64, 64, 32, 10},
{1, 32, 32, 32, 20},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
{
bool success = ck::profiler::profile_groupnorm_bwd_data_impl<DYDataType,
XDataType,
GammaDataType,
MeanInvStdDataType,
ComputeDataType,
DXDataType>(
true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// DYDataType XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType>
std::tuple<F32, F32, F32, F32, F32, F32>>;
TYPED_TEST_SUITE(TestgroupnormBwdData, KernelTypes);
TYPED_TEST(TestgroupnormBwdData, Test_FP32) { this->Run(); }

View File

@@ -0,0 +1,48 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_bwd_data_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestLayernorm2dBwdData : public ::testing::Test
{
protected:
using DYDataType = std::tuple_element_t<0, Tuple>;
using XDataType = std::tuple_element_t<1, Tuple>;
using GammaDataType = std::tuple_element_t<2, Tuple>;
using MeanInvStdDataType = std::tuple_element_t<3, Tuple>;
using ComputeDataType = std::tuple_element_t<4, Tuple>;
using DXDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// Bwd data: [N, D], reduce D
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
{
bool success =
ck::profiler::profile_layernorm_bwd_data_impl<DYDataType,
XDataType,
GammaDataType,
MeanInvStdDataType,
ComputeDataType,
DXDataType,
2>(true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// DYDataType XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType>
std::tuple<F32, F32, F32, F32, F32, F32>>;
TYPED_TEST_SUITE(TestLayernorm2dBwdData, KernelTypes);
TYPED_TEST(TestLayernorm2dBwdData, Test_FP32) { this->Run(); }