mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
* Add trait to use a persistent kernel and split the entrypoints in grouped gemm * Some helper functions for persistent kernel case * Get max occupancy grid using device properties * Implement tile loop in main entry point to grouped gemm * Enable GridSize() on device * Handle offset tile index using real current block index * Add persistent kernel choice to grouped gemm example * Use a for-loop for iterating over the group * Reduce VGPR spills by early-exit * Enable persistent kernel choice in grouped_gemm example * Add persistent kernel option to grouped_gemm test * Fix formatting with remod.py * Remove GridUpdateBlocks as blocks are now iteratively computed * Add comment about VGPR spilling * Fix formatting * Use CK_TILE_HOST instead of __host__ * Enable all Row/Col combinations in grouped gemm unit test * Add some KBatch=2 cases to grouped gemm tests * Fix SplitK for grouped gemm * Enable pipeline hotloop/tailnumber selection in-kernel for grouped gemm * Add type traits * Split examples to regular and tileloop * Formatting * Use hipExtStreamGetCUMask to get current active CUs for the given stream * Align test and example kernel config, and disable validation for splitk repeats * Remove debug options from CMakeLists.txt * Separate the code paths for persistent/non-persistent in test * Fix formatting * Address review comments --------- Co-authored-by: Adam Osewski <19374865+aosewski@users.noreply.github.com>
38 lines
1.7 KiB
C++
38 lines
1.7 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include <tuple>
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
#include "ck_tile/host.hpp"
|
|
#include "test_grouped_gemm_util.hpp"
|
|
|
|
using F16 = ck_tile::half_t;
|
|
using F32 = float;
|
|
using Row = ck_tile::tensor_layout::gemm::RowMajor;
|
|
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
|
|
using True = ck_tile::bool_constant<true>;
|
|
using False = ck_tile::bool_constant<false>;
|
|
|
|
// clang-format off
|
|
using KernelTypes = ::testing::Types<
|
|
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType, Persistent
|
|
std::tuple< Row, Col, Row, F16, F16, F32, F16, True>,
|
|
std::tuple< Row, Col, Row, F16, F16, F32, F16, False>,
|
|
std::tuple< Row, Col, Row, F16, F16, F32, F16, True>,
|
|
std::tuple< Row, Col, Row, F16, F16, F32, F16, False>,
|
|
|
|
std::tuple< Col, Col, Row, F16, F16, F32, F16, True>,
|
|
std::tuple< Col, Col, Row, F16, F16, F32, F16, False>,
|
|
std::tuple< Row, Row, Row, F16, F16, F32, F16, True>,
|
|
std::tuple< Row, Row, Row, F16, F16, F32, F16, False>,
|
|
std::tuple< Col, Row, Row, F16, F16, F32, F16, True>,
|
|
std::tuple< Col, Row, Row, F16, F16, F32, F16, False>
|
|
>;
|
|
// clang-format on
|
|
|
|
TYPED_TEST_SUITE(TestCkTileGroupedGemm, KernelTypes);
|
|
|
|
#include "test_grouped_gemm_ut_cases.inc"
|