Files
composable_kernel/tile_engine/ops/gemm/benchmark_gemm.cpp
Casey-Shi 64b17847fa [Tile Engine] Add benchmark for tile engine gemm. (#2193)
* initial commit -m benchmark

* only support profile

* fix

* fix doc

* add default config

* add ci

* fix cmake

* tmp save for gen blobs

* fix bug

* merge

* range config

* test success

* fix

* fix

* move struct

* remove config property

* fix config

* remove comment

* add cmake option & modify

* add changelog

* fix

* format

* add pydantic module to the docker image

* fix

* add benchmark for cold and warmp up

* python format

* add asm cache control

* fix README

* remove pydantic module

* modify changelog

* fix config

* recover benchmark_gemm and fix

* format python

* refactor profiler

* fix csv bug

* fix codegen bug

* add kernel instance object

* add benchmark gemm executable

* fix jenkins & delete extra header

* disable warning output & enable default config

* Disable sparsity for invalid warp tile combinations

* fix gemm host template func

* refactor gemm profiler

* filter out some inmstances

* default config test & fix codegen bug

* add sparse flag to gen more instances

---------

Co-authored-by: illsilin <Illia.Silin@amd.com>
Co-authored-by: khuagarw <khuagarw@amd.com>
Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>

[ROCm/composable_kernel commit: 128f5a1eab]
2025-05-26 22:32:36 -07:00

69 lines
2.1 KiB
C++

// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include <functional>
#include <tuple>
#include <exception>
#include "gemm_profiler.hpp"
#include "benchmark_gemm.hpp"
void benchmark_gemm(const ck_tile::ArgParser& arg_parser)
{
GemmProblem gemm_problem{arg_parser.get_int("split_k"),
arg_parser.get_int("m"),
arg_parser.get_int("n"),
arg_parser.get_int("k"),
arg_parser.get_int("stride_a"),
arg_parser.get_int("stride_b"),
arg_parser.get_int("stride_c"),
DataTypeTraits<ADataType>::name,
DataTypeTraits<BDataType>::name,
DataTypeTraits<AccDataType>::name,
DataTypeTraits<CDataType>::name,
ALayout::name,
BLayout::name,
CLayout::name,
arg_parser.get_bool("structured_sparsity")};
Setting setting{
arg_parser.get_int("warmup"),
arg_parser.get_int("repeat"),
arg_parser.get_bool("timer"),
arg_parser.get_int("verify"),
arg_parser.get_int("init"),
arg_parser.get_bool("log"),
arg_parser.get_str("csv_filename"),
};
auto& profiler = GemmProfiler::instance(setting);
try
{
auto kernel_func = get_kernel_func_by_trait(arg_parser);
profiler.benchmark(gemm_problem, kernel_func);
profiler.select_best_instance(static_cast<Metric>(arg_parser.get_int("metric")));
}
catch(const std::exception& e)
{
std::cerr << "Benchmark failed: " << e.what() << std::endl;
}
}
int main(int argc, char* argv[])
{
try
{
auto [result, parser] = create_args(argc, argv);
if(!result)
return EXIT_FAILURE;
benchmark_gemm(parser);
return 0;
}
catch(const std::exception& e)
{
std::cerr << "Error: " << e.what() << "\n";
return EXIT_FAILURE;
}
}