Files
composable_kernel/test/gemm_universal/test_gemm_universal_wmma_bf16.cpp
Anton Gorenko edd92fc546 DeviceGemm_Wmma_CShuffleV3 with BlockGemmPipelineVersion::v3 (#2096)
* Prepare files for DeviceGemm_Wmma_CShuffleV3

* Implement main part of CShuffleV3 with block pipeline v3 for WMMA

* Remove unused functions and template params for A/B descriptors

* Support both gfx11 and gfx12

* Enable SplitK for gfx12 and disable for gfx11

* Added RowColRow layout for DeviceGemmV2 fp16

* Added more instances for Row, Col, Row data layout

* Added instances for DeviceGemm_Wmma_CShuffleV3, Col, Row, Row data layout

* Added instances for DeviceGemm_Wmma_CShuffleV3, Col, Col, Row data layout

* Added more instances for DeviceGemm_Wmma_CShuffleV3, Row, Row, Row data layout

* Fix formatting

* Add documentation

Based on e5ad48a784

* Enable gemm_universal profiling for gfx11/12

* Add WMMA intrinsics for F8/BF8

* Support F8/BF8 DeviceGemm_Wmma_CShuffleV3, add basic instances

* Add BF16 instances and tests

* Fix test_gemm_universal_wmma_fp8 by adding CK_USE_WMMA_FP8

---------

Co-authored-by: Anca Hamuraru <anca@streamhpc.com>
2025-04-28 10:14:21 +05:00

81 lines
2.3 KiB
C++

// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
using BF16 = ck::bhalf_t;
using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
namespace {
template <typename X, typename Y>
struct tuple_concat;
template <typename... Xs, typename... Ys>
struct tuple_concat<std::tuple<Xs...>, std::tuple<Ys...>>
{
using type = std::tuple<Xs..., Ys...>;
};
} // namespace
template <typename Tuple>
class TestGemmUniversal_BF16_MK_KN
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Row>, Tuple>::type>
{
};
template <typename Tuple>
class TestGemmUniversal_BF16_MK_NK
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Col>, Tuple>::type>
{
};
template <typename Tuple>
class TestGemmUniversal_BF16_KM_KN
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Col, Row>, Tuple>::type>
{
};
template <typename Tuple>
class TestGemmUniversal_BF16_KM_NK
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Col, Col>, Tuple>::type>
{
};
// clang-format off
using KernelTypes_MK_KN = ::testing::Types<
// ADataType, BDataType, ComputeDataType, CDataType
std::tuple< BF16, BF16, BF16, BF16>
>;
using KernelTypes_MK_NK = ::testing::Types<
// ADataType, BDataType, ComputeDataType, CDataType
std::tuple< BF16, BF16, BF16, BF16>
>;
using KernelTypes_KM_KN = ::testing::Types<
// ADataType, BDataType, ComputeDataType, CDataType
std::tuple< BF16, BF16, BF16, BF16>
>;
using KernelTypes_KM_NK = ::testing::Types<
// ADataType, BDataType, ComputeDataType, CDataType
std::tuple< BF16, BF16, BF16, BF16>
>;
// clang-format on
TYPED_TEST_SUITE(TestGemmUniversal_BF16_MK_KN, KernelTypes_MK_KN);
TYPED_TEST_SUITE(TestGemmUniversal_BF16_MK_NK, KernelTypes_MK_NK);
TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_KN, KernelTypes_KM_KN);
TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_NK, KernelTypes_KM_NK);
#include "test_gemm_universal_ut_cases_bf16.inc"