mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
* Add DeviceBatchedGemmMultipleD_Dl * Fix batched_gemm tests * Fix comments * test_batched_gemm_multi_d fixes * Fix args for isSupported batchedGemmMultipleDDl * Disable tests for gfx90a
115 lines
6.7 KiB
C++
115 lines
6.7 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include <iostream>
|
|
|
|
#include "profiler/profile_batched_gemm_impl.hpp"
|
|
|
|
#include "ck/library/tensor_operation_instance/gpu/batched_gemm.hpp"
|
|
|
|
namespace {
|
|
using ADataType = ck::bhalf_t;
|
|
using BDataType = ck::bhalf_t;
|
|
using CDataType = ck::bhalf_t;
|
|
|
|
using Row = ck::tensor_layout::gemm::RowMajor;
|
|
using Col = ck::tensor_layout::gemm::ColumnMajor;
|
|
|
|
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
|
} // namespace
|
|
|
|
int main()
|
|
{
|
|
int M = 256;
|
|
int N = 256;
|
|
int K = 128;
|
|
int BatchCount = 3;
|
|
|
|
bool pass = true;
|
|
|
|
using namespace ck::tensor_operation::device;
|
|
|
|
pass = pass && ck::profiler::profile_batched_gemm_impl<ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
Row,
|
|
Row,
|
|
Row,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough,
|
|
DeviceBatchedGemm<Row,
|
|
Row,
|
|
Row,
|
|
ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough>>(
|
|
true, 1, false, 1, M, N, K, K, N, N, M * K, K * N, M * N, BatchCount);
|
|
|
|
pass = pass && ck::profiler::profile_batched_gemm_impl<ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
Row,
|
|
Col,
|
|
Row,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough,
|
|
DeviceBatchedGemm<Row,
|
|
Col,
|
|
Row,
|
|
ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough>>(
|
|
true, 1, false, 1, M, N, K, K, K, N, M * K, K * N, M * N, BatchCount);
|
|
|
|
pass = pass && ck::profiler::profile_batched_gemm_impl<ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
Col,
|
|
Row,
|
|
Row,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough,
|
|
DeviceBatchedGemm<Col,
|
|
Row,
|
|
Row,
|
|
ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough>>(
|
|
true, 1, false, 1, M, N, K, M, N, N, M * K, K * N, M * N, BatchCount);
|
|
|
|
pass = pass && ck::profiler::profile_batched_gemm_impl<ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
Col,
|
|
Col,
|
|
Row,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough,
|
|
DeviceBatchedGemm<Col,
|
|
Col,
|
|
Row,
|
|
ADataType,
|
|
BDataType,
|
|
CDataType,
|
|
PassThrough,
|
|
PassThrough,
|
|
PassThrough>>(
|
|
true, 1, false, 1, M, N, K, M, K, N, M * K, K * N, M * N, BatchCount);
|
|
|
|
std::cout << "test BatchedGEMM bf16: " << (pass ? "Pass" : "Fail") << std::endl;
|
|
return pass ? 0 : 1;
|
|
}
|