mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 12:00:07 +00:00
Multiple fixes to GroupedGemm+SplitK (#707)
* Add license header.
* Reduce number of logged output. Add constant initialization.
* Add functional tests for grouped_gemm with different kbatch value.
* Add debug log informations + remove unused code.
* Don't pass kbatch to CalculateKPadded.
* Turn on logging in grouped gemm and gemm splitk profiler
* Debug: limit number of test cases to run;
* Log more information and initialize with constant value.
* Turn on DEBUG_LOG
* Add more debug log informations.
* Limit the number of instances to compile.
* Use GridwiseGemmPipeline
* Use KBatch to calculate K0
* Multiple DebugLog messages.
* Unit tests for multiple KBatch values.
* Refactoring
* Disable logging
* extract out of if statement KBatch update.
* Uncomment instances.
* Disable DebugLog.
* Use Kbatch when calculate KPadded.
* Fix CGridDesc padding.
* Use available helper functions.
* Uncomment code commented for debuggin.
* Remove unnecessary debug log messages.
* Uncomment previously commented code for debug purposes.
* Add KBatch info to profiler output summary log.
* Add gtests for gemm splitk using ckProfiler API.
* Add more test-cases for different data layout.
* Add more test cases for gemm splitk
* Remove old test.
* Unit tests for MKNK ggemm interface.
* Fix and add more unit-tests.
* Constepxr everything!
* Increase error threshold for fp16 and splitk.
Since we're using fp16 atomic add for splitk there's a
known precision loss.
---------
Co-authored-by: Adam Osewski <aosewski@amd.com>
Co-authored-by: zjing14 <zhangjing14@gmail.com>
[ROCm/composable_kernel commit: 70e4eb567f]
This commit is contained in:
34
test/grouped_gemm/test_grouped_gemm_splitk.cpp
Normal file
34
test/grouped_gemm/test_grouped_gemm_splitk.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/utility/data_type.hpp"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "test_grouped_gemm_util.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using Row = ck::tensor_layout::gemm::RowMajor;
|
||||
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
||||
|
||||
using RRR_F16_F16_F16 = ck::test::TestGroupedGemm<std::tuple<Row, Row, Row, F16, F16, F16>>;
|
||||
using RCR_F16_F16_F16 = ck::test::TestGroupedGemm<std::tuple<Row, Col, Row, F16, F16, F16>>;
|
||||
|
||||
using RRR_F16_F16_F16_LargeK = ck::test::TestGroupedGemm<std::tuple<Row, Row, Row, F16, F16, F16>>;
|
||||
using RCR_F16_F16_F16_LargeK = ck::test::TestGroupedGemm<std::tuple<Row, Col, Row, F16, F16, F16>>;
|
||||
|
||||
const std::vector<int> KBATCH{1, 2, 3, 5, 8};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(TestGroupedGemm_splitk_MK_KN, RRR_F16_F16_F16, testing::ValuesIn(KBATCH));
|
||||
INSTANTIATE_TEST_SUITE_P(TestGroupedGemm_splitk_MK_NK, RCR_F16_F16_F16, testing::ValuesIn(KBATCH));
|
||||
INSTANTIATE_TEST_SUITE_P(TestGroupedGemm_splitk_LargeK_MK_KN,
|
||||
RRR_F16_F16_F16_LargeK,
|
||||
testing::Values(32, 64));
|
||||
INSTANTIATE_TEST_SUITE_P(TestGroupedGemm_splitk_LargeK_MK_NK,
|
||||
RCR_F16_F16_F16_LargeK,
|
||||
testing::Values(32, 64));
|
||||
|
||||
#include "test_grouped_gemm_ut_cases.inc"
|
||||
Reference in New Issue
Block a user