Files
composable_kernel/test/gemm_mx/test_gemm_mx.cpp
Andriy Roshchenko 79b0bfeb41 MX GEMM - Add FP8 GEMM Tests for Different Layouts (#2152)
* Add gemm_mx_fp8_bf8 example with row-major B

* Add more overloads of MX MFMA instructions

* Add MK_KN (RRR) tests

* Add KM_NK (CCR) tests

* Add more problem sizes to Large tests

* Add test_gemm_mx to the list of regression tests
2025-05-01 11:55:48 -06:00

278 lines
6.3 KiB
C++

// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "test_gemm_mx_util.hpp"
using E8M0 = ck::e8m0_bexp_t;
using F8 = ck::f8_t;
using BF8 = ck::bf8_t;
using F6 = ck::f6_t;
using BF6 = ck::bf6_t;
using F4 = ck::f4_t;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
namespace {
template <typename X, typename Y>
struct tuple_concat;
template <typename... Xs, typename... Ys>
struct tuple_concat<std::tuple<Xs...>, std::tuple<Ys...>>
{
using type = std::tuple<Xs..., Ys...>;
};
} // namespace
template <typename Tuple>
class TestGemmMX_MK_NK
: public ck::test::TestGemmMX<typename tuple_concat<std::tuple<Row, Col>, Tuple>::type>
{
};
template <typename Tuple>
class TestGemmMX_MK_KN
: public ck::test::TestGemmMX<typename tuple_concat<std::tuple<Row, Row>, Tuple>::type>
{
};
template <typename Tuple>
class TestGemmMX_KM_NK
: public ck::test::TestGemmMX<typename tuple_concat<std::tuple<Col, Col>, Tuple>::type>
{
};
// clang-format off
using KernelTypes_F8_MK_NK = ::testing::Types<
#if defined(CK_ENABLE_FP8)
// ADataType, BDataType, CDataType, ScaleBlockSize
std::tuple< F8, F8, F16, ck::Number<32> >,
std::tuple< F8, F8, BF16, ck::Number<32> >
#endif
>;
using KernelTypes_BF8_F8_MK_KN = ::testing::Types<
#if defined(CK_ENABLE_FP8)
// ADataType, BDataType, CDataType, ScaleBlockSize
std::tuple< BF8, F8, F16, ck::Number<32> >
#endif
>;
using KernelTypes_F8_KM_NK = ::testing::Types<
#if defined(CK_ENABLE_FP8)
// ADataType, BDataType, CDataType, ScaleBlockSize
std::tuple< F8, F8, BF16, ck::Number<32> >
#endif
>;
// clang-format on
TYPED_TEST_SUITE(TestGemmMX_MK_NK, KernelTypes_F8_MK_NK);
TYPED_TEST_SUITE(TestGemmMX_MK_KN, KernelTypes_BF8_F8_MK_KN);
TYPED_TEST_SUITE(TestGemmMX_KM_NK, KernelTypes_F8_KM_NK);
/// A: RowMajor
/// B: ColMajor
/// C: RowMajor
TYPED_TEST(TestGemmMX_MK_NK, SmallM)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 256;
constexpr int K = 512;
constexpr int StrideA = K;
constexpr int StrideB = K;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmMX_MK_NK, MidLargeM)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 256;
constexpr int K = 512;
constexpr int StrideA = K;
constexpr int StrideB = K;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmMX_MK_NK, Regular)
{
std::vector<int> Ms{3840};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = K;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmMX_MK_NK, Large)
{
std::vector<std::pair<int, int>> test_sizes{{5120, 5120}, {3840, 5120}, {4096, 4096}};
constexpr int K = 4096;
constexpr int StrideA = K;
constexpr int StrideB = K;
for(auto test_size : test_sizes)
{
auto M = test_size.first;
auto N = test_size.second;
const auto StrideC = N;
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
}
/// A: RowMajor
/// B: RowMajor
/// C: RowMajor
TYPED_TEST(TestGemmMX_MK_KN, SmallM)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 256;
constexpr int K = 512;
constexpr int StrideA = K;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmMX_MK_KN, MidLargeM)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 256;
constexpr int K = 512;
constexpr int StrideA = K;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmMX_MK_KN, Regular)
{
std::vector<int> Ms{3840};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmMX_MK_KN, Large)
{
std::vector<std::pair<int, int>> test_sizes{{5120, 5120}, {3840, 5120}, {4096, 4096}};
constexpr int K = 4096;
constexpr int StrideA = K;
for(auto test_size : test_sizes)
{
auto M = test_size.first;
auto N = test_size.second;
const auto StrideB = N;
const auto StrideC = N;
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
}
/// A: ColMajor
/// B: ColMajor
/// C: RowMajor
TYPED_TEST(TestGemmMX_KM_NK, SmallN)
{
constexpr int M = 256;
std::vector<int> Ns{1, 2, 3, 4, 5, 6};
constexpr int K = 512;
constexpr int StrideA = M;
constexpr int StrideB = K;
for(int N : Ns)
{
const auto new_N = N * 8;
const auto StrideC = new_N;
this->Run(M, new_N, K, StrideA, StrideB, StrideC);
}
}
TYPED_TEST(TestGemmMX_KM_NK, MidLargeN)
{
constexpr int M = 256;
std::vector<int> Ns{127, 255, 312, 799, 1573};
constexpr int K = 512;
constexpr int StrideA = M;
constexpr int StrideB = K;
for(int N : Ns)
{
const auto new_N = (N + 7) / 8 * 8;
const auto StrideC = new_N;
this->Run(M, new_N, K, StrideA, StrideB, StrideC);
}
}
TYPED_TEST(TestGemmMX_KM_NK, Regular)
{
std::vector<int> Ms{3840};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideB = K;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, M, StrideB, StrideC);
}
TYPED_TEST(TestGemmMX_KM_NK, Large)
{
std::vector<std::pair<int, int>> test_sizes{{5120, 5120}, {3840, 5120}, {4096, 4096}};
constexpr int K = 4096;
constexpr int StrideB = K;
for(auto test_size : test_sizes)
{
auto M = test_size.first;
auto N = test_size.second;
const auto StrideA = M;
const auto StrideC = N;
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
}