mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
* replace buffer_atomic with global_atomic
* fixed global_atomic_add
* added bf16 atomic_add
* format
* clang-format-12
* clean
* clean
* add guards
* Update gtest.cmake
* enabled splitk_gemm_multi_d
* format
* add ckProfiler
* format
* fixed naming
* format
* clean
* clean
* add guards
* fix clang format
* format
* add kbatch printout
* clean
* Add rocm6.2 related gemm optimization
* Limit bf16 atomic usage
* remove redundant RCR gemm_universal instance
* Add RRR fp8 gemm universal instance
* Bug fix
* Add GPU_TARGET guard to FP8/BF8 target
* bug fix
* update cmake
* remove all fp8/bf8 example if arch not support
* Enable fp8 RRR support in ckProfiler
* limit greedy-reverse flag to gemm_universal in ckProfiler
---------
Co-authored-by: Jing Zhang <jizhan@fb.com>
Co-authored-by: Jing Zhang <jizhan@meta.com>
Co-authored-by: zjing14 <zhangjing14@gmail.com>
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>
[ROCm/composable_kernel commit: 3049b5467c]
70 lines
2.1 KiB
C++
70 lines
2.1 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include <tuple>
|
|
|
|
#include "gtest/gtest.h"
|
|
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
|
#include "test_gemm_universal_util.hpp"
|
|
|
|
using F8 = ck::f8_t;
|
|
using F16 = ck::half_t;
|
|
using BF16 = ck::bhalf_t;
|
|
using F32 = float;
|
|
|
|
using Row = ck::tensor_layout::gemm::RowMajor;
|
|
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
|
|
|
namespace {
|
|
|
|
template <typename X, typename Y>
|
|
struct tuple_concat;
|
|
|
|
template <typename... Xs, typename... Ys>
|
|
struct tuple_concat<std::tuple<Xs...>, std::tuple<Ys...>>
|
|
{
|
|
using type = std::tuple<Xs..., Ys...>;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_MK_KN
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Row>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
template <typename Tuple>
|
|
class TestGemmUniversal_MK_NK
|
|
: public ck::test::TestGemmUniversal<typename tuple_concat<std::tuple<Row, Col>, Tuple>::type>
|
|
{
|
|
};
|
|
|
|
// clang-format off
|
|
using KernelTypes_MK_KN = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< F16, F16, F16, F16>,
|
|
#if (defined CK_ENABLE_FP8)
|
|
std::tuple< F16, F8, F16, F16>,
|
|
std::tuple< F8, F16, F16, F16>,
|
|
std::tuple< F8, F8, F8, BF16>,
|
|
#endif
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
using KernelTypes_MK_NK = ::testing::Types<
|
|
// ADataType, BDataType, ComputeDataType, CDataType
|
|
std::tuple< F16, F16, F16, F16>,
|
|
#if (defined CK_ENABLE_FP8)
|
|
std::tuple< F16, F8, F16, F16>,
|
|
std::tuple< F8, F16, F16, F16>,
|
|
std::tuple< F8, F8, F8, BF16>,
|
|
#endif
|
|
std::tuple< BF16, BF16, BF16, BF16>
|
|
>;
|
|
// clang-format on
|
|
|
|
TYPED_TEST_SUITE(TestGemmUniversal_MK_KN, KernelTypes_MK_KN);
|
|
TYPED_TEST_SUITE(TestGemmUniversal_MK_NK, KernelTypes_MK_NK);
|
|
|
|
#include "test_gemm_universal_ut_cases.inc"
|