[CK_TILE] Fix Batched GEMM Example GPU verification (#2800)

Added more batched GEMM test cases
This commit is contained in:
aledudek
2025-09-09 09:30:57 +02:00
committed by GitHub
parent 75570d0fa8
commit e82ccbdaf7
4 changed files with 79 additions and 37 deletions

View File

@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
@@ -16,11 +16,11 @@ using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
// clang-format off
using KernelTypes = ::testing::Types<
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType
// std::tuple< Row, Row, Row, F16, F16, F32, F16>,
//std::tuple< Col, Row, Row, F16, F16, F32, F16>,
std::tuple< Row, Col, Row, F16, F16, F32, F16>//,
//std::tuple< Col, Col, Row, F16, F16, F32, F16>
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType
std::tuple< Row, Row, Row, F16, F16, F32, F16>,
std::tuple< Col, Row, Row, F16, F16, F32, F16>,
std::tuple< Row, Col, Row, F16, F16, F32, F16>,
std::tuple< Col, Col, Row, F16, F16, F32, F16>
>;
// clang-format on

View File

@@ -1,9 +1,74 @@
#pragma once
struct GemmParams
{
int M;
int N;
int K;
int batchCount;
};
struct StrideConfig
{
int strideA;
int strideB;
int strideC;
int batchStrideA;
int batchStrideB;
int batchStrideC;
};
TYPED_TEST(TestCkTileBatchedGemm, Basic)
{
constexpr int M = 256;
constexpr int N = 256;
constexpr int K = 512;
this->Run(M, N, K);
std::vector<GemmParams> gemmParams{{256, 256, 256, 1},
{256, 256, 256, 2},
{256, 256, 512, 2},
{256, 256, 128, 2},
{256, 256, 64, 2},
{256, 256, 64, 3},
{256, 256, 64, 4},
{256, 256, 64, 8},
{256, 256, 64, 16}};
for(auto& params : gemmParams)
{
std::vector<StrideConfig> strideConfigs{{params.K,
params.N,
params.N,
params.M * params.K,
params.K * params.N,
params.M * params.N},
{params.K,
params.K,
params.N,
params.M * params.K,
params.K * params.N,
params.M * params.N},
{params.M,
params.N,
params.N,
params.M * params.K,
params.K * params.N,
params.M * params.N},
{params.M,
params.K,
params.N,
params.M * params.K,
params.K * params.N,
params.M * params.N}};
for(auto& conf : strideConfigs)
{
this->Run(params.M,
params.N,
params.K,
conf.strideA,
conf.strideB,
conf.strideC,
conf.batchStrideA,
conf.batchStrideB,
conf.batchStrideC,
params.batchCount);
}
}
}