Added wmma support for gemm quantization: (#2841)

- profiler for gemm quantization for DL/XDL - tests for gemm quantization for DL/XDL - implementation for gemm quantization for WMMA - profiler/tests for gemm qunatization for WMMA Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
2026-05-04 13:41:24 +00:00 · 2025-09-17 01:23:29 +02:00
parent 2723dbd332
commit f97b2a3f5d
21 changed files with 1167 additions and 8 deletions
--- a/test/quantization/gemm/test_gemm_quantization_ut_cases.inc
+++ b/test/quantization/gemm/test_gemm_quantization_ut_cases.inc
@@ -0,0 +1,41 @@
+#pragma once
+
+TYPED_TEST(TestGemmQuantization, SmallM)
+{
+    std::vector<int> Ms{1, 2, 3, 4, 5, 6};
+    constexpr int N = 512;
+    constexpr int K = 320;
+
+    for(int M : Ms)
+        this->Run({{M, N, K}});
+}
+
+TYPED_TEST(TestGemmQuantization, MidLargeM)
+{
+    std::vector<int> Ms{127, 255, 312, 799, 1573};
+    constexpr int N = 1024;
+    constexpr int K = 320;
+
+    for(int M : Ms)
+        this->Run({{M, N, K}});
+}
+
+TYPED_TEST(TestGemmQuantization, MNKPadded)
+{
+    const std::vector<int> Ms{127, 150, 188, 210};
+    constexpr int N = 136;
+    constexpr int K = 280;
+
+    for(int M : Ms)
+        this->Run({{M, N, K}});
+}
+
+TYPED_TEST(TestGemmQuantization, Regular)
+{
+    constexpr int M = 512;
+    constexpr int N = 512;
+    std::vector<int> Ks{512};
+
+    for(int K : Ks)
+        this->Run({{M, N, K}});
+}