Files
composable_kernel/test/gemm_ab_scale/test_gemm_ab_scale.cpp
Enrico Degregori ce99cab605 Wmma support for gemm_ab_scale (#3314)
* Support gemm_ab_scale:

 - Add tests
 - Integrate scaling implementation in multiple D
 - Generalize existing b_scale for ab_scale
 - Add instances
 - Generalize implementation for ScaleBlockM, ScaleBlockN, ScaleBlockK
 - Add support for all layouts supported by xdl
 - Fix splitk xdl

* Fix copyright

* Wmma support for gemm_blockscale_wp (#3315)

* Support for  preshuffle with ab scale

 - add support for b preshuffle in GridwiseGemm_wmma_cshuffle_v3_ab_scale
 - add support for AScaleLayout amnd BScaleLayout (can be different
   from ALayout and BLayout, respectively)
 - add Run method in v1 pipeline to support preshuffle + scaling
 - add support for preshuffle gemms in common invoker
 - Add splitk support

* Fix copyright header
2025-12-11 09:06:20 +01:00

237 lines
5.3 KiB
C++

// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <tuple>
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_ab_scale_util.hpp"
using BF16 = ck::bhalf_t;
using F32 = float;
using F8 = ck::f8_t;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
namespace {
template <typename X, typename Y>
struct tuple_concat;
template <typename... Xs, typename... Ys>
struct tuple_concat<std::tuple<Xs...>, std::tuple<Ys...>>
{
using type = std::tuple<Xs..., Ys...>;
};
} // namespace
template <typename Tuple>
class TestGemmABScale_MK_NK : public ck::test::TestGemmABScale<
typename tuple_concat<std::tuple<Row, Col, Row>, Tuple>::type>
{
};
template <typename Tuple>
class TestGemmABScale_MK_KN : public ck::test::TestGemmABScale<
typename tuple_concat<std::tuple<Row, Row, Row>, Tuple>::type>
{
};
template <typename Tuple>
class TestGemmABScale_KM_KN : public ck::test::TestGemmABScale<
typename tuple_concat<std::tuple<Col, Row, Row>, Tuple>::type>
{
};
// clang-format off
using KernelTypes = ::testing::Types<
// ADataType, BDataType, ComputeDataType, EDataType
std::tuple< F8, F32, F8, F32, F8, BF16>
>;
// clang-format on
TYPED_TEST_SUITE(TestGemmABScale_MK_NK, KernelTypes);
TYPED_TEST_SUITE(TestGemmABScale_MK_KN, KernelTypes);
TYPED_TEST_SUITE(TestGemmABScale_KM_KN, KernelTypes);
// Row Col
TYPED_TEST(TestGemmABScale_MK_NK, SmallM)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = K;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmABScale_MK_NK, SmallMPadK)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 704;
constexpr int StrideA = K;
constexpr int StrideB = K;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmABScale_MK_NK, MidLargeM)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = K;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmABScale_MK_NK, Regular)
{
std::vector<int> Ms{512};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = K;
constexpr int StrideE = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideE);
}
// Row Row
TYPED_TEST(TestGemmABScale_MK_KN, SmallM)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmABScale_MK_KN, SmallMPadK)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 704;
constexpr int StrideA = K;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmABScale_MK_KN, MidLargeM)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
TYPED_TEST(TestGemmABScale_MK_KN, Regular)
{
std::vector<int> Ms{512};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideA = K;
constexpr int StrideB = N;
constexpr int StrideE = N;
for(int M : Ms)
this->Run(M, N, K, StrideA, StrideB, StrideE);
}
// Col Row
TYPED_TEST(TestGemmABScale_KM_KN, SmallM)
{
std::vector<int> Ms{16, 32};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
{
int StrideA = M;
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
}
TYPED_TEST(TestGemmABScale_KM_KN, SmallMPadK)
{
std::vector<int> Ms{16, 32};
constexpr int N = 512;
constexpr int K = 704;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
{
int StrideA = M;
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
}
TYPED_TEST(TestGemmABScale_KM_KN, MidLargeM)
{
std::vector<int> Ms{128, 256};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideB = N;
constexpr int StrideC = N;
for(int M : Ms)
{
int StrideA = M;
this->Run(M, N, K, StrideA, StrideB, StrideC);
}
}
TYPED_TEST(TestGemmABScale_KM_KN, Regular)
{
std::vector<int> Ms{512};
constexpr int N = 512;
constexpr int K = 1024;
constexpr int StrideB = N;
constexpr int StrideE = N;
for(int M : Ms)
{
int StrideA = M;
this->Run(M, N, K, StrideA, StrideB, StrideE);
}
}