Added wmma support for gemm quantization: (#2841)

- profiler for gemm quantization for DL/XDL
- tests for gemm quantization for DL/XDL
- implementation for gemm quantization for WMMA
- profiler/tests for gemm qunatization for WMMA

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
This commit is contained in:
Wojciech Laskowski
2025-09-17 01:23:29 +02:00
committed by GitHub
parent 2723dbd332
commit f97b2a3f5d
21 changed files with 1167 additions and 8 deletions

View File

@@ -0,0 +1,41 @@
#pragma once
TYPED_TEST(TestGemmQuantization, SmallM)
{
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 320;
for(int M : Ms)
this->Run({{M, N, K}});
}
TYPED_TEST(TestGemmQuantization, MidLargeM)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 1024;
constexpr int K = 320;
for(int M : Ms)
this->Run({{M, N, K}});
}
TYPED_TEST(TestGemmQuantization, MNKPadded)
{
const std::vector<int> Ms{127, 150, 188, 210};
constexpr int N = 136;
constexpr int K = 280;
for(int M : Ms)
this->Run({{M, N, K}});
}
TYPED_TEST(TestGemmQuantization, Regular)
{
constexpr int M = 512;
constexpr int N = 512;
std::vector<int> Ks{512};
for(int K : Ks)
this->Run({{M, N, K}});
}