mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 21:51:28 +00:00
173 implement device grouped gemm fixed nk for rdna4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes This PR adds an RDNA4 implementation of the device_grouped_gemm_fixed_nk instance library using for WMMA. The implementation is based on the existing DeviceGroupedGemm_Xdl_Fixed_NK design and reuses the same high-level structure, but replaces the XDL kernel with a WMMA-based one. It uses the GridwiseGemm_wmma_cshuffle_v3 kernel. At this stage, the focus is functional correctness and compatibility, not performance tuning. ## Technical Details - Device struct for grouped gemm fixed NK - Example code for the WMMA version - Unit tests for both new wmma implementation and the reference XDL code (previously missing) - Generic ck profiler interface with the purpose of calling unit tests. ## Checklist Please put an into the boxes that apply. You can also fill these out after creating the PR. If you're not sure, please don't hesitate to ask. - [x] I have added tests relevant to the introduced functionality, and the unit tests are passing locally - [x] I have added the test to REGRESSION_TESTS list defined at the top of CMakeLists.txt in tests/CMakeLists.txt, **IF** the test takes more than 30 seconds to run. - [ ] I have added inline documentation which enables the maintainers with understanding the motivation - [ ] I have removed the stale documentation which is no longer relevant after this pull request - [x] (If this change is user-facing) I have added release notes which provide the end users with a brief summary of the improvement from this pull request - [x] I have run on all changed files - [x] Any dependent changes have been merged ## Discussion If this is a relatively large or complex change, feel free to start a discussion by explaining why you chose the solution you did and what alternatives you considered
85 lines
2.4 KiB
C++
85 lines
2.4 KiB
C++
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#include <tuple>
|
|
#include <vector>
|
|
|
|
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
|
#include "ck/utility/data_type.hpp"
|
|
#include "ck/utility/tuple.hpp"
|
|
|
|
#include "gtest/gtest.h"
|
|
#include "test_grouped_gemm_util.hpp"
|
|
|
|
ck::index_t param_mask = 0xffffff;
|
|
ck::index_t instance_index = -1;
|
|
|
|
using F16 = ck::half_t;
|
|
using BF16 = ck::bhalf_t;
|
|
using F8 = ck::f8_t;
|
|
using I8 = int8_t;
|
|
|
|
using Row = ck::tensor_layout::gemm::RowMajor;
|
|
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
|
|
|
template <typename Tuple>
|
|
class TestGroupedGemm
|
|
: public ck::test::TestGroupedGemm<Tuple, false, ck::test::FixedNKGroupedGemmProfiler>
|
|
{
|
|
public:
|
|
void SetUp() override
|
|
{
|
|
ck::test::TestGroupedGemm<Tuple, false, ck::test::FixedNKGroupedGemmProfiler>::SetUp();
|
|
|
|
#if defined(CK_USE_WMMA)
|
|
// The old XDL tests didn't fail if instances were not supported, so we want to keep that
|
|
// behaviour When compiling WMMA instances and WMMA is supported, then we'll fail if a
|
|
// specific case is not supported
|
|
this->fail_if_no_supported_instances_ =
|
|
ck::is_gfx11_supported() || ck::is_gfx12_supported();
|
|
#endif
|
|
}
|
|
};
|
|
|
|
// clang-format off
|
|
using KernelTypes = ::testing::Types<
|
|
#if CK_USE_OCP_FP8 || CK_USE_FNUZ_FP8 || defined(CK_USE_FP8_ON_UNSUPPORTED_ARCH) || \
|
|
defined(CK_USE_WMMA_FP8)
|
|
ck::Tuple<Row, Row, Row, F16, F8, F16>,
|
|
ck::Tuple<Row, Col, Row, F16, F8, F16>,
|
|
#endif
|
|
|
|
ck::Tuple<Row, Row, Row, F16, F16, F16>,
|
|
ck::Tuple<Row, Col, Row, F16, F16, F16>,
|
|
|
|
ck::Tuple<Row, Row, Row, BF16, BF16, BF16>,
|
|
ck::Tuple<Row, Col, Row, BF16, BF16, BF16>,
|
|
ck::Tuple<Row, Row, Row, BF16, I8, BF16>,
|
|
ck::Tuple<Row, Col, Row, BF16, I8, BF16>,
|
|
|
|
ck::Tuple<Row, Row, Row, F16, I8, F16>,
|
|
ck::Tuple<Row, Col, Row, F16, I8, F16>
|
|
>;
|
|
|
|
// clang-format on
|
|
|
|
TYPED_TEST_SUITE(TestGroupedGemm, KernelTypes);
|
|
|
|
#include "test_grouped_gemm_fixed_nk_cases.inc"
|
|
int main(int argc, char** argv)
|
|
{
|
|
testing::InitGoogleTest(&argc, argv);
|
|
if(argc == 1) {}
|
|
else if(argc == 3)
|
|
{
|
|
param_mask = strtol(argv[1], nullptr, 0);
|
|
instance_index = atoi(argv[2]);
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Usage of " << argv[0] << std::endl;
|
|
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
|
|
}
|
|
return RUN_ALL_TESTS();
|
|
}
|