mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
* Prepare files for DeviceGemm_Wmma_CShuffleV3
* Implement main part of CShuffleV3 with block pipeline v3 for WMMA
* Remove unused functions and template params for A/B descriptors
* Support both gfx11 and gfx12
* Enable SplitK for gfx12 and disable for gfx11
* Added RowColRow layout for DeviceGemmV2 fp16
* Added more instances for Row, Col, Row data layout
* Added instances for DeviceGemm_Wmma_CShuffleV3, Col, Row, Row data layout
* Added instances for DeviceGemm_Wmma_CShuffleV3, Col, Col, Row data layout
* Added more instances for DeviceGemm_Wmma_CShuffleV3, Row, Row, Row data layout
* Fix formatting
* Add documentation
Based on e5ad48a784
* Enable gemm_universal profiling for gfx11/12
* Add WMMA intrinsics for F8/BF8
* Support F8/BF8 DeviceGemm_Wmma_CShuffleV3, add basic instances
* Add BF16 instances and tests
* Fix test_gemm_universal_wmma_fp8 by adding CK_USE_WMMA_FP8
---------
Co-authored-by: Anca Hamuraru <anca@streamhpc.com>
81 lines
2.3 KiB
C++
81 lines
2.3 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include <tuple>
|
|
|
|
#include "gtest/gtest.h"
|
|
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
|
#include "test_gemm_universal_util.hpp"
|
|
|
|
using BF16 = ck::bhalf_t;
|
|
using F32 = float;
|
|
|
|
using Row = ck::tensor_layout::gemm::RowMajor;
|
|
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
|
|
|
namespace {
|
|
|
|
template <typename X, typename Y>
|
|
struct tuple_concat;
|
|
|
|
template <typename... Xs, typename... Ys>
|
|
struct tuple_concat<std::tuple<Xs...>, std::tuple<Ys...>>
|
|
{
|
|
using type = std::tuple<Xs..., Ys...>;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_BF16_MK_KN
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Row>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_BF16_MK_NK
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Col>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_BF16_KM_KN
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Col, Row>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_BF16_KM_NK
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Col, Col>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
// clang-format off
|
|
using KernelTypes_MK_KN = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
|
|
using KernelTypes_MK_NK = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
|
|
using KernelTypes_KM_KN = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
|
|
using KernelTypes_KM_NK = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
// clang-format on
|
|
|
|
TYPED_TEST_SUITE(TestGemmUniversal_BF16_MK_KN, KernelTypes_MK_KN);
|
|
TYPED_TEST_SUITE(TestGemmUniversal_BF16_MK_NK, KernelTypes_MK_NK);
|
|
TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_KN, KernelTypes_KM_KN);
|
|
TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_NK, KernelTypes_KM_NK);
|
|
|
|
#include "test_gemm_universal_ut_cases_bf16.inc"
|