Added wmma support for gemm quantization: (#2841)

- profiler for gemm quantization for DL/XDL - tests for gemm quantization for DL/XDL - implementation for gemm quantization for WMMA - profiler/tests for gemm qunatization for WMMA Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
2026-04-19 22:39:03 +00:00 · 2025-09-17 01:23:29 +02:00
parent 2723dbd332
commit f97b2a3f5d
21 changed files with 1167 additions and 8 deletions
--- a/test/quantization/CMakeLists.txt
+++ b/test/quantization/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_custom_target(test_quantization)
+add_subdirectory(gemm)
--- a/test/quantization/gemm/CMakeLists.txt
+++ b/test/quantization/gemm/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_custom_target(test_gemm_quantization_targets)
+
+add_gtest_executable(test_gemm_quantization test_gemm_quantization.cpp)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_quantization PRIVATE utility device_quantization_instance)
+    add_dependencies(test_gemm_quantization_targets test_gemm_quantization)
+endif()
+
+add_dependencies(test_quantization test_gemm_quantization_targets)
--- a/test/quantization/gemm/test_gemm_quantization.cpp
+++ b/test/quantization/gemm/test_gemm_quantization.cpp
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "gtest/gtest.h"
+#include "ck/ck.hpp"
+#include "profiler/profile_gemm_quantization_impl.hpp"
+#include "test_gemm_quantization_util.hpp"
+
+using I8  = int8_t;
+using I32 = int32_t;
+using Row = ck::tensor_layout::gemm::RowMajor;
+using Col = ck::tensor_layout::gemm::ColumnMajor;
+
+template <typename Tuple>
+class TestGemmQuantization : public ck::test::TestGemmQuantizationCommon<Tuple>
+{
+    protected:
+    using ProfileCall = bool (*const)(int, int, bool, bool, int, int, int, int, int, int, float);
+
+    ProfileCall GetImpl() override
+    {
+        return &ck::profiler::profile_gemm_quantization_impl<
+            typename ck::test::TestGemmQuantizationCommon<Tuple>::ADataType,
+            typename ck::test::TestGemmQuantizationCommon<Tuple>::BDataType,
+            typename ck::test::TestGemmQuantizationCommon<Tuple>::AccDataType,
+            typename ck::test::TestGemmQuantizationCommon<Tuple>::EDataType,
+            typename ck::test::TestGemmQuantizationCommon<Tuple>::ALayout,
+            typename ck::test::TestGemmQuantizationCommon<Tuple>::BLayout,
+            typename ck::test::TestGemmQuantizationCommon<Tuple>::ELayout>;
+    }
+};
+
+using KernelTypes = ::testing::Types<std::tuple<I8, I8, I32, I8, Row, Row, Row>,
+                                     std::tuple<I8, I8, I32, I8, Row, Col, Row>,
+                                     std::tuple<I8, I8, I32, I8, Col, Row, Row>,
+                                     std::tuple<I8, I8, I32, I8, Col, Col, Row>>;
+
+TYPED_TEST_SUITE(TestGemmQuantization, KernelTypes);
+
+#include "test_gemm_quantization_ut_cases.inc"
--- a/test/quantization/gemm/test_gemm_quantization_ut_cases.inc
+++ b/test/quantization/gemm/test_gemm_quantization_ut_cases.inc
@@ -0,0 +1,41 @@
+#pragma once
+
+TYPED_TEST(TestGemmQuantization, SmallM)
+{
+    std::vector<int> Ms{1, 2, 3, 4, 5, 6};
+    constexpr int N = 512;
+    constexpr int K = 320;
+
+    for(int M : Ms)
+        this->Run({{M, N, K}});
+}
+
+TYPED_TEST(TestGemmQuantization, MidLargeM)
+{
+    std::vector<int> Ms{127, 255, 312, 799, 1573};
+    constexpr int N = 1024;
+    constexpr int K = 320;
+
+    for(int M : Ms)
+        this->Run({{M, N, K}});
+}
+
+TYPED_TEST(TestGemmQuantization, MNKPadded)
+{
+    const std::vector<int> Ms{127, 150, 188, 210};
+    constexpr int N = 136;
+    constexpr int K = 280;
+
+    for(int M : Ms)
+        this->Run({{M, N, K}});
+}
+
+TYPED_TEST(TestGemmQuantization, Regular)
+{
+    constexpr int M = 512;
+    constexpr int N = 512;
+    std::vector<int> Ks{512};
+
+    for(int K : Ks)
+        this->Run({{M, N, K}});
+}
--- a/test/quantization/gemm/test_gemm_quantization_util.hpp
+++ b/test/quantization/gemm/test_gemm_quantization_util.hpp
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "gtest/gtest.h"
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
+#include "ck/utility/data_type.hpp"
+
+using Row = ck::tensor_layout::gemm::RowMajor;
+using Col = ck::tensor_layout::gemm::ColumnMajor;
+using I8  = int8_t;
+using I32 = int32_t;
+
+namespace ck {
+namespace test {
+
+using TestMatrixSizes = std::vector<std::vector<ck::index_t>>;
+
+static const TestMatrixSizes DefaultTestMatrixSizes = {
+    {16, 32, 64}, {512, 2048, 4096}, {2048, 1024, 16}};
+
+template <typename Tuple>
+class TestGemmQuantizationCommon : public ::testing::Test
+{
+    protected:
+    using ADataType   = std::tuple_element_t<0, Tuple>;
+    using BDataType   = std::tuple_element_t<1, Tuple>;
+    using AccDataType = std::tuple_element_t<2, Tuple>;
+    using EDataType   = std::tuple_element_t<3, Tuple>;
+    using ALayout     = std::tuple_element_t<4, Tuple>;
+    using BLayout     = std::tuple_element_t<5, Tuple>;
+    using ELayout     = std::tuple_element_t<6, Tuple>;
+
+    using ProfileCall = bool (*const)(int, int, bool, bool, int, int, int, int, int, int, float);
+
+    virtual ProfileCall GetImpl() = 0;
+
+    void Run(const TestMatrixSizes& lengths = DefaultTestMatrixSizes)
+    {
+        bool all_success = true;
+
+        for(auto length : lengths)
+        {
+            int M               = length[0];
+            int N               = length[1];
+            int K               = length[2];
+            int StrideA         = ck::is_same_v<ALayout, Row> ? K : M;
+            int StrideB         = ck::is_same_v<BLayout, Row> ? N : K;
+            int StrideE         = ck::is_same_v<ELayout, Row> ? N : M;
+            float requant_scale = 0.03f;
+
+            all_success =
+                all_success &
+                GetImpl()(1, 1, false, true, M, N, K, StrideA, StrideB, StrideE, requant_scale);
+        }
+
+        EXPECT_TRUE(all_success);
+    }
+};
+
+} // namespace test
+} // namespace ck