Files
composable_kernel/example/ck_tile/03_gemm/gemm_basic.cpp
yinglu d460ab35b6 [rocm-libraries] ROCm/rocm-libraries#4302 (commit e62bd8a)
[CK_TILE] add tf32 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Proposed changes

TF32 is added in CK on gfx942 and gfx950. This PR is to initiate tf32 in
CK_TILE on gfx942 and gfx950.

## Checklist

Please put an into the boxes that apply. You can also fill these out
after creating the PR. If you're not sure, please don't hesitate to ask.

- [ ] I have added tests relevant to the introduced functionality, and
the unit tests are passing locally
- [ ] I have added the test to REGRESSION_TESTS list defined at the top
of CMakeLists.txt in tests/CMakeLists.txt, **IF** the test takes more
than 30 seconds to run.
- [ ] I have added inline documentation which enables the maintainers
with understanding the motivation
- [ ] I have removed the stale documentation which is no longer relevant
after this pull request
- [ ] (If this change is user-facing) I have added release notes which
provide the end users with a brief summary of the improvement from this
pull request
- [x] I have run  on all changed files
- [ ] Any dependent changes have been merged

## Discussion
2026-03-19 09:19:06 +00:00

119 lines
4.1 KiB
C++

// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include "gemm_utils.hpp"
#include "run_gemm_example.inc"
#include "run_gemm_example_common.hpp"
#include "gemm_basic_invoker.hpp"
#include "ck_tile/core/utility/gemm_validation.hpp"
int run_gemm_example(ck_tile::ArgParser& arg_parser)
{
std::string data_type = arg_parser.get_str("prec");
std::string a_layout = arg_parser.get_str("a_layout");
std::string b_layout = arg_parser.get_str("b_layout");
std::string c_layout = arg_parser.get_str("c_layout");
std::tuple<ck_tile::index_t, ck_tile::index_t, ck_tile::index_t> gemm_sizes =
parse_gemm_size(arg_parser);
int m = std::get<0>(gemm_sizes);
int n = std::get<1>(gemm_sizes);
int k = std::get<2>(gemm_sizes);
int stride_a = arg_parser.get_int("stride_a");
int stride_b = arg_parser.get_int("stride_b");
int stride_c = arg_parser.get_int("stride_c");
using GemmConfig = GemmConfigBase;
using Invoker = BasicInvoker;
ck_tile::validate_gemm_stride(
a_layout, b_layout, c_layout, m, n, k, stride_a, stride_b, stride_c);
if(data_type == "fp16")
{
return run_gemm_example_prec_type<GemmConfig, Invoker, ck_tile::half_t>(
a_layout, b_layout, arg_parser);
}
else if(data_type == "bf16")
{
return run_gemm_example_prec_type<GemmConfig, Invoker, ck_tile::bf16_t>(
a_layout, b_layout, arg_parser);
}
#ifdef CK_GFX950_SUPPORT
else if(data_type == "tf32")
{
// Pass tf32_t as A/B types - epilogue auto-detects and maps to float for data operations
return run_gemm_example_prec_type<GemmConfig,
Invoker,
ck_tile::tf32_t,
ck_tile::tf32_t,
float>(a_layout, b_layout, arg_parser);
}
#endif
else if(data_type == "fp8")
{
return run_gemm_example_prec_type<GemmConfig,
Invoker,
ck_tile::fp8_t,
ck_tile::fp8_t,
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "bf8")
{
return run_gemm_example_prec_type<GemmConfig,
Invoker,
ck_tile::bf8_t,
ck_tile::bf8_t,
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "i8")
{
return run_gemm_example_prec_type<GemmConfig,
Invoker,
ck_tile::int8_t,
ck_tile::int8_t,
int32_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "pk_int4_t")
{
// TODO: Add support for bhalf_t ADataType
if constexpr(GemmConfig::Pipeline == ck_tile::GemmPipeline::COMPUTE_V3)
{
return run_gemm_example_prec_type<GemmConfig,
Invoker,
ck_tile::half_t,
ck_tile::pk_int4_t,
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else
{
throw std::runtime_error("Unsupported data type for this operation !!!");
}
}
else
{
throw std::runtime_error("Unsupported data type for this operation !!!");
}
}
int main(int argc, char* argv[])
{
auto arg_parser = create_args();
auto result = arg_parser.parse(argc, argv);
if(!result)
return -1;
try
{
return !run_gemm_example(arg_parser);
}
catch(const std::runtime_error& e)
{
std::cerr << "Runtime error: " << e.what() << '\n';
return EXIT_FAILURE;
}
}