mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-03-23 16:47:40 +00:00
* wip: grouped_gemm implementation based on wmma kernel + example for fp16 * chore: clean up grouped_gem_wmma_splitk_fp16 example * chore: add cmake options to fully disable XDL or WMMA kernels * feat: add tests for grouped gemma wmma instances for f16 and bf16 (all layouts) * chore: add grouped gemm wmma bf16 example * refactor: reuse more code between instance factory functions * chore: turn test failure if not all batch sizes are supported into a warning * chore: made failing of test on unsupported instances conditional to not break old tests * chore: add log message to failure case where AK1/BK1/KBatch is too high for K value * fix: issue with new overloads of GridwiseGemm_wmma_cshuffle_v3::Run() * fix: stray comma after parameter list * fix: compilation issues on RDNA3 and tests failing due to unsupported problems still being ran * chore: update copyright in header comments * nit: minor feebdack * refactor: unified XDL / wma tests * fix: properly disable FP8 instances when ONLY targeting gfx11 * refactor: add v3 suffix to grouped_gemm device struct name * fix: small typos in example code * fix: fully exclude xdl/wmma instances when using the corresponding cmake flags * chore: remove unused destructor and added pipeline support checks to remove unnecessary paths * fix: make sure to not add instance library to group if library was skipped * fix: make sure xdl grouped gemm doesnt fail the new test * fix: explicitly exclude test if no xdl/wmma support, as pattern matching fails in this case * fix: examples not working since dependent types and functions were moved to ck namespace in develop * fix: tests failing when compiling for just gfx11 due to trying to run unsupported instances * chore: replace/add copyright headers with new format
91 lines
2.7 KiB
C++
91 lines
2.7 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#include <tuple>
|
|
#include <vector>
|
|
|
|
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
|
#include "ck/utility/data_type.hpp"
|
|
|
|
#include "gtest/gtest.h"
|
|
#include "test_grouped_gemm_util.hpp"
|
|
|
|
ck::index_t param_mask = 0xffffff;
|
|
ck::index_t instance_index = -1;
|
|
|
|
using F16 = ck::half_t;
|
|
using BF16 = ck::bhalf_t;
|
|
using F8 = ck::f8_t;
|
|
using I8 = int8_t;
|
|
|
|
using Row = ck::tensor_layout::gemm::RowMajor;
|
|
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
|
|
|
template <typename Tuple>
|
|
class TestGroupedGemm : public ck::test::TestGroupedGemm<Tuple>
|
|
{
|
|
public:
|
|
void SetUp() override
|
|
{
|
|
ck::test::TestGroupedGemm<Tuple>::SetUp();
|
|
|
|
#if defined(CK_USE_WMMA)
|
|
// The old XDL tests didn't fail if instances were not supported, so we want to keep that
|
|
// behaviour When compiling WMMA instances and WMMA is supported, then we'll fail if a
|
|
// specific case is not supported
|
|
this->fail_if_no_supported_instances_ =
|
|
ck::is_gfx11_supported() || ck::is_gfx12_supported();
|
|
#endif
|
|
}
|
|
};
|
|
|
|
// clang-format off
|
|
using KernelTypes = ::testing::Types<
|
|
|
|
#if defined(CK_USE_WMMA)
|
|
// WWMA only. No reason to not have it for XDL, but the instance was not defined and it was not in the original test.
|
|
std::tuple< Col, Col, Row, BF16, BF16, BF16>,
|
|
#endif
|
|
|
|
#if defined(CK_USE_XDL) && defined(__gfx9__)
|
|
// XDL only at the moment, instances for WMMA not defined
|
|
std::tuple< Row, Row, Row, BF16, I8, BF16>,
|
|
std::tuple< Row, Col, Row, BF16, I8, BF16>,
|
|
#endif
|
|
|
|
#if (defined(CK_USE_XDL) && (defined(__gfx9__) || defined(__gfx12__))) || (defined(CK_USE_WMMA) && defined(__gfx12__))
|
|
std::tuple< Row, Row, Row, F8, F16, F16>,
|
|
std::tuple< Row, Row, Row, F16, F8, F16>,
|
|
#endif
|
|
|
|
std::tuple< Row, Row, Row, F16, F16, F16>,
|
|
std::tuple< Row, Col, Row, F16, F16, F16>,
|
|
std::tuple< Col, Row, Row, F16, F16, F16>,
|
|
std::tuple< Col, Col, Row, F16, F16, F16>,
|
|
|
|
std::tuple< Row, Row, Row, BF16, BF16, BF16>,
|
|
std::tuple< Row, Col, Row, BF16, BF16, BF16>,
|
|
std::tuple< Col, Row, Row, BF16, BF16, BF16>
|
|
>;
|
|
// clang-format on
|
|
|
|
TYPED_TEST_SUITE(TestGroupedGemm, KernelTypes);
|
|
|
|
#include "test_grouped_gemm_ut_cases.inc"
|
|
int main(int argc, char** argv)
|
|
{
|
|
testing::InitGoogleTest(&argc, argv);
|
|
if(argc == 1) {}
|
|
else if(argc == 3)
|
|
{
|
|
param_mask = strtol(argv[1], nullptr, 0);
|
|
instance_index = atoi(argv[2]);
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Usage of " << argv[0] << std::endl;
|
|
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
|
|
}
|
|
return RUN_ALL_TESTS();
|
|
}
|