mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
* replace buffer_atomic with global_atomic * fixed global_atomic_add * added bf16 atomic_add * format * clang-format-12 * clean * clean * add guards * Update gtest.cmake * enabled splitk_gemm_multi_d * format * add ckProfiler * format * fixed naming * format * clean * clean * add guards * fix clang format * format * add kbatch printout * clean * Add rocm6.2 related gemm optimization * Limit bf16 atomic usage * remove redundant RCR gemm_universal instance * Add RRR fp8 gemm universal instance * Bug fix * Add GPU_TARGET guard to FP8/BF8 target * bug fix * update cmake * remove all fp8/bf8 example if arch not support * Enable fp8 RRR support in ckProfiler * limit greedy-reverse flag to gemm_universal in ckProfiler --------- Co-authored-by: Jing Zhang <jizhan@fb.com> Co-authored-by: Jing Zhang <jizhan@meta.com> Co-authored-by: zjing14 <zhangjing14@gmail.com> Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> Co-authored-by: illsilin <Illia.Silin@amd.com>
70 lines
2.1 KiB
C++
70 lines
2.1 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include <tuple>
|
|
|
|
#include "gtest/gtest.h"
|
|
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
|
#include "test_gemm_universal_util.hpp"
|
|
|
|
using F8 = ck::f8_t;
|
|
using F16 = ck::half_t;
|
|
using BF16 = ck::bhalf_t;
|
|
using F32 = float;
|
|
|
|
using Row = ck::tensor_layout::gemm::RowMajor;
|
|
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
|
|
|
namespace {
|
|
|
|
template <typename X, typename Y>
|
|
struct tuple_concat;
|
|
|
|
template <typename... Xs, typename... Ys>
|
|
struct tuple_concat<std::tuple<Xs...>, std::tuple<Ys...>>
|
|
{
|
|
using type = std::tuple<Xs..., Ys...>;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_MK_KN
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Row>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_MK_NK
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Col>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
// clang-format off
|
|
using KernelTypes_MK_KN = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< F16, F16, F16, F16>,
|
|
#if (defined CK_ENABLE_FP8)
|
|
std::tuple< F16, F8, F16, F16>,
|
|
std::tuple< F8, F16, F16, F16>,
|
|
std::tuple< F8, F8, F8, BF16>,
|
|
#endif
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
using KernelTypes_MK_NK = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< F16, F16, F16, F16>,
|
|
#if (defined CK_ENABLE_FP8)
|
|
std::tuple< F16, F8, F16, F16>,
|
|
std::tuple< F8, F16, F16, F16>,
|
|
std::tuple< F8, F8, F8, BF16>,
|
|
#endif
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
// clang-format on
|
|
|
|
TYPED_TEST_SUITE(TestGemmUniversal_MK_KN, KernelTypes_MK_KN);
|
|
TYPED_TEST_SUITE(TestGemmUniversal_MK_NK, KernelTypes_MK_NK);
|
|
|
|
#include "test_gemm_universal_ut_cases.inc"
|