mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-01 12:11:19 +00:00
* Fix a typo * Use std::variant to call run_gemm_example_with_layouts with the available layout variant combinations * Use a unified run_gemm_example_prec_type for basic gemm and universal gemm * Factor out run_gemm_example_prec_type * Refactor argument parsing in gemm_splitk_two_stage_reduce.cpp * Parse arguments outside of create_args * Move the gemm operators to separate structs to facilitate their reuse * Move the invokers to separate files to facilitate their reuse * Rename the invoker files for consistency with the examples that use them * Add fp32 support to the elementwise examples, and produce an error message for unsupported types * Get rid of four unused variables * Make two variables const * Add support for different input-output type combinations in elementwise examples * Test support for different input and output types in elementwise examples * Add support for different operations in the elementwise unary tests * Add support for UnaryConvert in the elementwise unary tests * Add support for bf16 in elementwise examples, excluding unsupported type combinations * Make some operator parameters const in ElementWiseKernel * Remove some unnecessary include statements * Implement a two-stage GEMM that does a type conversion in the second stage using the elementwise kernel * Clear workspace instead of output when flushing the cache in SplitKTwoStageInvoker::gemm * Fix formatting issues reported by clang * Add back CK_TILE_USE_WMMA related changes * Use the right prec type for bf16 in the universal GEMM and two stage split K examples * Add some brackets * Add some brackets * Separate the clearing of the GEMM output memory from the cache flushing in the universal GEMM example * Separate the clearing of the GEMM output memory from the cache flushing in the split K two stage example * Fix formatting * No need to call SetZero on ws_m_n_dev_buf here, as clear_gemm_output now does this as part of the kernel preprocessing * Add fp16 data type to splitk two stage example * Add preprocessing with optional cache flushing and clearing of output for k_batch > 1 to the basic GEMM example
92 lines
3.0 KiB
C++
92 lines
3.0 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include "gemm_utils.hpp"
|
|
#include "run_gemm_example.inc"
|
|
#include "run_gemm_example_common.hpp"
|
|
#include "gemm_basic_invoker.hpp"
|
|
|
|
int run_gemm_example(ck_tile::ArgParser& arg_parser)
|
|
{
|
|
std::string data_type = arg_parser.get_str("prec");
|
|
std::string a_layout = arg_parser.get_str("a_layout");
|
|
std::string b_layout = arg_parser.get_str("b_layout");
|
|
|
|
using GemmConfig = GemmConfigBase;
|
|
using Invoker = BasicInvoker;
|
|
|
|
if(data_type == "fp16")
|
|
{
|
|
return run_gemm_example_prec_type<GemmConfig, Invoker, ck_tile::half_t>(
|
|
a_layout, b_layout, arg_parser);
|
|
}
|
|
else if(data_type == "bf16")
|
|
{
|
|
return run_gemm_example_prec_type<GemmConfig, Invoker, ck_tile::bf16_t>(
|
|
a_layout, b_layout, arg_parser);
|
|
}
|
|
else if(data_type == "fp8")
|
|
{
|
|
return run_gemm_example_prec_type<GemmConfig,
|
|
Invoker,
|
|
ck_tile::fp8_t,
|
|
ck_tile::fp8_t,
|
|
ck_tile::half_t>(a_layout, b_layout, arg_parser);
|
|
}
|
|
else if(data_type == "bf8")
|
|
{
|
|
return run_gemm_example_prec_type<GemmConfig,
|
|
Invoker,
|
|
ck_tile::bf8_t,
|
|
ck_tile::bf8_t,
|
|
ck_tile::half_t>(a_layout, b_layout, arg_parser);
|
|
}
|
|
else if(data_type == "i8")
|
|
{
|
|
return run_gemm_example_prec_type<GemmConfig,
|
|
Invoker,
|
|
ck_tile::int8_t,
|
|
ck_tile::int8_t,
|
|
int32_t>(a_layout, b_layout, arg_parser);
|
|
}
|
|
else if(data_type == "pk_int4_t")
|
|
{
|
|
// TODO: Add support for bhalf_t ADataType
|
|
if constexpr(GemmConfig::Pipeline == CK_TILE_PIPELINE_COMPUTE_V3)
|
|
{
|
|
return run_gemm_example_prec_type<GemmConfig,
|
|
Invoker,
|
|
ck_tile::half_t,
|
|
ck_tile::pk_int4_t,
|
|
ck_tile::half_t>(a_layout, b_layout, arg_parser);
|
|
}
|
|
else
|
|
{
|
|
throw std::runtime_error("Unsupported data type for this operation !!!");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw std::runtime_error("Unsupported data type for this operation !!!");
|
|
}
|
|
}
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
auto arg_parser = create_args();
|
|
auto result = arg_parser.parse(argc, argv);
|
|
|
|
if(!result)
|
|
return -1;
|
|
|
|
try
|
|
{
|
|
return !run_gemm_example(arg_parser);
|
|
}
|
|
catch(const std::runtime_error& e)
|
|
{
|
|
std::cerr << "Runtime error: " << e.what() << '\n';
|
|
return EXIT_FAILURE;
|
|
}
|
|
}
|