mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 02:54:21 +00:00
Add support for mixed-precision f16bf16_int8 gemm (#1127)
[ROCm/composable_kernel commit: ba86eadce5]
This commit is contained in:
@@ -122,6 +122,7 @@ add_subdirectory(space_filling_curve)
|
||||
add_subdirectory(conv_util)
|
||||
add_subdirectory(reference_conv_fwd)
|
||||
add_subdirectory(gemm)
|
||||
add_subdirectory(gemm_add)
|
||||
add_subdirectory(gemm_layernorm)
|
||||
add_subdirectory(gemm_split_k)
|
||||
add_subdirectory(gemm_reduce)
|
||||
|
||||
11
test/gemm_add/CMakeLists.txt
Normal file
11
test/gemm_add/CMakeLists.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
add_gtest_executable(test_gemm_add test_gemm_add.hpp)
|
||||
target_link_libraries(test_gemm_add PRIVATE utility device_gemm_add_instance)
|
||||
|
||||
add_gtest_executable(test_gemm_add_relu test_gemm_add_relu.cpp)
|
||||
target_link_libraries(test_gemm_add_relu PRIVATE utility device_gemm_add_instance device_gemm_add_relu_instance)
|
||||
|
||||
add_gtest_executable(test_gemm_add_silu test_gemm_add_silu.cpp)
|
||||
target_link_libraries(test_gemm_add_silu PRIVATE utility device_gemm_add_instance device_gemm_add_silu_instance)
|
||||
|
||||
add_gtest_executable(test_gemm_add_fastgelu test_gemm_add_fastgelu.cpp)
|
||||
target_link_libraries(test_gemm_add_fastgelu PRIVATE utility device_gemm_add_instance device_gemm_add_fastgelu_instance)
|
||||
72
test/gemm_add/test_gemm_add.hpp
Normal file
72
test/gemm_add/test_gemm_add.hpp
Normal file
@@ -0,0 +1,72 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ck/ck.hpp"
|
||||
#include "profiler/profile_gemm_add_impl.hpp"
|
||||
|
||||
using Row = ck::tensor_layout::gemm::RowMajor;
|
||||
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
||||
|
||||
using I8 = int8_t;
|
||||
using BF16 = ck::bhalf_t;
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
template <typename Tuple>
|
||||
class TestGemmAdd : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<2, Tuple>;
|
||||
using D0DataType = std::tuple_element_t<3, Tuple>;
|
||||
using EDataType = std::tuple_element_t<4, Tuple>;
|
||||
using ALayout = std::tuple_element_t<5, Tuple>;
|
||||
using BLayout = std::tuple_element_t<6, Tuple>;
|
||||
using D0Layout = std::tuple_element_t<7, Tuple>;
|
||||
using ELayout = std::tuple_element_t<8, Tuple>;
|
||||
|
||||
constexpr static auto ProfileGemmAddImpl = ck::profiler::profile_gemm_add_impl<ADataType,
|
||||
BDataType,
|
||||
AccDataType,
|
||||
D0DataType,
|
||||
EDataType,
|
||||
ALayout,
|
||||
BLayout,
|
||||
D0Layout,
|
||||
ELayout>;
|
||||
|
||||
virtual decltype(ProfileGemmAddImpl) GetImpl() { return ProfileGemmAddImpl; }
|
||||
|
||||
void Run()
|
||||
{
|
||||
std::vector<std::vector<ck::index_t>> lengths = {
|
||||
{16, 32, 64}, {2048, 4096, 8192}, {2048, 1024, 16}};
|
||||
|
||||
bool all_success = true;
|
||||
|
||||
for(auto length : lengths)
|
||||
{
|
||||
int M = length[0];
|
||||
int N = length[1];
|
||||
int K = length[2];
|
||||
int StrideA = ck::is_same_v<ALayout, Row> ? K : M;
|
||||
int StrideB = ck::is_same_v<BLayout, Row> ? N : K;
|
||||
int StrideD0 = ck::is_same_v<D0Layout, Row> ? N : M;
|
||||
int StrideE = ck::is_same_v<ELayout, Row> ? N : M;
|
||||
|
||||
all_success =
|
||||
all_success &
|
||||
GetImpl()(true, 1, false, false, M, N, K, StrideA, StrideB, StrideD0, StrideE);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(all_success);
|
||||
}
|
||||
};
|
||||
|
||||
using KernelTypes = ::testing::Types<std::tuple<F16, I8, F32, F16, F16, Row, Row, Row, Row>,
|
||||
std::tuple<BF16, I8, F32, BF16, BF16, Row, Row, Row, Row>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestGemmAdd, KernelTypes);
|
||||
TYPED_TEST(TestGemmAdd, Test_BF16FP16_INT8) { this->Run(); }
|
||||
41
test/gemm_add/test_gemm_add_fastgelu.cpp
Normal file
41
test/gemm_add/test_gemm_add_fastgelu.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ck/ck.hpp"
|
||||
#include "profiler/profile_gemm_add_fastgelu_impl.hpp"
|
||||
#include "test_gemm_add.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
class TestGemmAddFastgelu : public TestGemmAdd<Tuple>
|
||||
{
|
||||
private:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<2, Tuple>;
|
||||
using D0DataType = std::tuple_element_t<3, Tuple>;
|
||||
using EDataType = std::tuple_element_t<4, Tuple>;
|
||||
using ALayout = std::tuple_element_t<5, Tuple>;
|
||||
using BLayout = std::tuple_element_t<6, Tuple>;
|
||||
using D0Layout = std::tuple_element_t<7, Tuple>;
|
||||
using ELayout = std::tuple_element_t<8, Tuple>;
|
||||
|
||||
constexpr static auto ProfileGemmAddFastgeluImpl =
|
||||
ck::profiler::profile_gemm_add_fastgelu_impl<ADataType,
|
||||
BDataType,
|
||||
AccDataType,
|
||||
D0DataType,
|
||||
EDataType,
|
||||
ALayout,
|
||||
BLayout,
|
||||
D0Layout,
|
||||
ELayout>;
|
||||
|
||||
decltype(ProfileGemmAddFastgeluImpl) GetImpl() override { return ProfileGemmAddFastgeluImpl; }
|
||||
};
|
||||
|
||||
using KernelTypes = ::testing::Types<std::tuple<F16, I8, F32, F16, F16, Row, Row, Row, Row>,
|
||||
std::tuple<BF16, I8, F32, BF16, BF16, Row, Row, Row, Row>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestGemmAddFastgelu, KernelTypes);
|
||||
TYPED_TEST(TestGemmAddFastgelu, Test_BF16FP16) { this->Run(); }
|
||||
41
test/gemm_add/test_gemm_add_relu.cpp
Normal file
41
test/gemm_add/test_gemm_add_relu.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ck/ck.hpp"
|
||||
#include "profiler/profile_gemm_add_relu_impl.hpp"
|
||||
#include "test_gemm_add.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
class TestGemmAddRelu : public TestGemmAdd<Tuple>
|
||||
{
|
||||
private:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<2, Tuple>;
|
||||
using D0DataType = std::tuple_element_t<3, Tuple>;
|
||||
using EDataType = std::tuple_element_t<4, Tuple>;
|
||||
using ALayout = std::tuple_element_t<5, Tuple>;
|
||||
using BLayout = std::tuple_element_t<6, Tuple>;
|
||||
using D0Layout = std::tuple_element_t<7, Tuple>;
|
||||
using ELayout = std::tuple_element_t<8, Tuple>;
|
||||
|
||||
constexpr static auto ProfileGemmAddReluImpl =
|
||||
ck::profiler::profile_gemm_add_relu_impl<ADataType,
|
||||
BDataType,
|
||||
AccDataType,
|
||||
D0DataType,
|
||||
EDataType,
|
||||
ALayout,
|
||||
BLayout,
|
||||
D0Layout,
|
||||
ELayout>;
|
||||
|
||||
decltype(ProfileGemmAddReluImpl) GetImpl() override { return ProfileGemmAddReluImpl; }
|
||||
};
|
||||
|
||||
using KernelTypes = ::testing::Types<std::tuple<F16, I8, F32, F16, F16, Row, Row, Row, Row>,
|
||||
std::tuple<BF16, I8, F32, BF16, BF16, Row, Row, Row, Row>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestGemmAddRelu, KernelTypes);
|
||||
TYPED_TEST(TestGemmAddRelu, Test_BF16FP16_INT8) { this->Run(); }
|
||||
41
test/gemm_add/test_gemm_add_silu.cpp
Normal file
41
test/gemm_add/test_gemm_add_silu.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ck/ck.hpp"
|
||||
#include "profiler/profile_gemm_add_silu_impl.hpp"
|
||||
#include "test_gemm_add.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
class TestGemmAddSilu : public TestGemmAdd<Tuple>
|
||||
{
|
||||
private:
|
||||
using ADataType = std::tuple_element_t<0, Tuple>;
|
||||
using BDataType = std::tuple_element_t<1, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<2, Tuple>;
|
||||
using D0DataType = std::tuple_element_t<3, Tuple>;
|
||||
using EDataType = std::tuple_element_t<4, Tuple>;
|
||||
using ALayout = std::tuple_element_t<5, Tuple>;
|
||||
using BLayout = std::tuple_element_t<6, Tuple>;
|
||||
using D0Layout = std::tuple_element_t<7, Tuple>;
|
||||
using ELayout = std::tuple_element_t<8, Tuple>;
|
||||
|
||||
constexpr static auto ProfileGemmAddSiluImpl =
|
||||
ck::profiler::profile_gemm_add_silu_impl<ADataType,
|
||||
BDataType,
|
||||
AccDataType,
|
||||
D0DataType,
|
||||
EDataType,
|
||||
ALayout,
|
||||
BLayout,
|
||||
D0Layout,
|
||||
ELayout>;
|
||||
|
||||
decltype(ProfileGemmAddSiluImpl) GetImpl() override { return ProfileGemmAddSiluImpl; }
|
||||
};
|
||||
|
||||
using KernelTypes = ::testing::Types<std::tuple<F16, I8, F32, F16, F16, Row, Row, Row, Row>,
|
||||
std::tuple<BF16, I8, F32, BF16, BF16, Row, Row, Row, Row>>;
|
||||
|
||||
TYPED_TEST_SUITE(TestGemmAddSilu, KernelTypes);
|
||||
TYPED_TEST(TestGemmAddSilu, Test_BF16FP16_INT8) { this->Run(); }
|
||||
Reference in New Issue
Block a user