* Remove some duplicate code in fmha_fwd_appendkv_kernel.hpp

* Simplify two templated operator calls by having the templated types deduced automatically

* Simplify two GemmPipeline calls

* Fix GemmPipelineAgBgCrCompV4::GetName

* Refactor use of ArgParser in CK tile GEMM examples

* Update args in README.md to match the implementation in create_args

* Remove some unnecessary include statements

* Rename two variables

* Factor out common code

* Factor out do_verify

* Add and use type aliases for memory operation integral constants

* In gemm_basic.cpp, use kPadM, kPadN, kPadK, and kBlockPerCu from GemmConfig

---------

Co-authored-by: Adam Osewski <19374865+aosewski@users.noreply.github.com>
This commit is contained in:
SamiAario-AMD
2025-08-13 11:12:08 +03:00
committed by GitHub
parent 05a6e92705
commit 28a97865f5
9 changed files with 142 additions and 178 deletions

View File

@@ -18,7 +18,6 @@ This will result in an executable `build/bin/tile_example_gemm_basic` & `build/b
## example
```
args:
-b batch size (default:1)
-m m dimension (default:1024)
-n n dimension (default:2048)
-k k dimension (default:64)
@@ -29,9 +28,11 @@ args:
-stride_b Tensor B stride (default:0)
-stride_c Tensor C stride (default:0)
-v 0. No validation, 1. Validation on CPU, 2. Validation on GPU (default:2)
-e Absolute error tolerance (default:1e-5)
-prec data type. fp16/bf16/fp8/bf8/int8 (default:fp16)
-warmup number of iterations before benchmark the kernel (default:10)
-repeat number of iterations to benchmark the kernel (default:100)
-timer gpu:gpu timer, cpu:cpu timer (default:gpu)
-split_k splitK value (default:1)
-init 0:random, 1:linear, 2:constant (default:1)
-persistent 0:non-persistent, 1:persistent (default:0)
```

View File

@@ -1,15 +1,6 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
#include <hip/hip_runtime.h>
#include <cstring>
#include <iostream>
#include <ostream>
#include <string>
#include <tuple>
#include "ck_tile/host.hpp"
#include "gemm_utils.hpp"
template <typename GemmConfig,
@@ -29,12 +20,6 @@ float gemm(const ck_tile::GemmHostArgs& args, const ck_tile::stream_config& s)
{
if constexpr(Persistent)
std::cout << "WARNING: Ignoring persistent kernel option for basic gemm." << std::endl;
// The kPadM, kPadN, kPadK & kBlockPerCu should also come from the Codegen part.
constexpr bool kPadM = false;
constexpr bool kPadN = false;
constexpr bool kPadK = false;
constexpr int kBlockPerCu = 1;
// This part comes from the Codegen
constexpr ck_tile::index_t M_Tile = 256;
@@ -56,8 +41,12 @@ float gemm(const ck_tile::GemmHostArgs& args, const ck_tile::stream_config& s)
using TilePartitioner = ck_tile::GemmTile1DPartitioner<CodegenGemmShape>;
using CodegenGemmTraits =
ck_tile::TileGemmTraits<kPadM, kPadN, kPadK, ALayout, BLayout, CLayout>;
using CodegenGemmTraits = ck_tile::TileGemmTraits<GemmConfig::kPadM,
GemmConfig::kPadN,
GemmConfig::kPadK,
ALayout,
BLayout,
CLayout>;
using CodegenPipelineProblem = ck_tile::
GemmPipelineProblem<ADataType, BDataType, AccDataType, CodegenGemmShape, CodegenGemmTraits>;
@@ -111,28 +100,30 @@ float gemm(const ck_tile::GemmHostArgs& args, const ck_tile::stream_config& s)
<< std::endl;
}
float ave_time = ck_tile::launch_kernel(
s, ck_tile::make_kernel<blocks.x, kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
float ave_time =
ck_tile::launch_kernel(s,
ck_tile::make_kernel<blocks.x, GemmConfig::kBlockPerCu>(
Kernel{}, grids, blocks, 0, kargs));
return ave_time;
};
if(args.k_batch == 1)
{
return Run(ck_tile::integral_constant<ck_tile::memory_operation_enum,
ck_tile::memory_operation_enum::set>{});
return Run(MemoryOpSet{});
}
else
{
return Run(ck_tile::integral_constant<ck_tile::memory_operation_enum,
ck_tile::memory_operation_enum::atomic_add>{});
return Run(MemoryOpAtomicAdd{});
}
}
#include "run_gemm_example.inc"
template <typename APrecType, typename BPrecType = APrecType, typename CPrecType = APrecType>
int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int argc, char* argv[])
int run_gemm_example_prec_type(std::string a_layout,
std::string b_layout,
ck_tile::ArgParser& arg_parser)
{
using Row = ck_tile::tensor_layout::gemm::RowMajor;
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
@@ -142,12 +133,12 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
if(a_layout == "R" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfigBase, APrecType, BPrecType, CPrecType>(
argc, argv, Row{}, Col{}, Row{});
arg_parser, Row{}, Col{}, Row{});
}
else if(a_layout == "C" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfigBase, APrecType, BPrecType, CPrecType>(
argc, argv, Col{}, Col{}, Row{});
arg_parser, Col{}, Col{}, Row{});
}
else
{
@@ -160,22 +151,22 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
if(a_layout == "R" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfigBase, APrecType, BPrecType, CPrecType>(
argc, argv, Row{}, Col{}, Row{});
arg_parser, Row{}, Col{}, Row{});
}
else if(a_layout == "R" && b_layout == "R")
{
return run_gemm_example_with_layouts<GemmConfigBase, APrecType, BPrecType, CPrecType>(
argc, argv, Row{}, Row{}, Row{});
arg_parser, Row{}, Row{}, Row{});
}
else if(a_layout == "C" && b_layout == "R")
{
return run_gemm_example_with_layouts<GemmConfigBase, APrecType, BPrecType, CPrecType>(
argc, argv, Col{}, Row{}, Row{});
arg_parser, Col{}, Row{}, Row{});
}
else if(a_layout == "C" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfigBase, APrecType, BPrecType, CPrecType>(
argc, argv, Col{}, Col{}, Row{});
arg_parser, Col{}, Col{}, Row{});
}
else
{
@@ -184,38 +175,34 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
}
}
int run_gemm_example(int argc, char* argv[])
int run_gemm_example(ck_tile::ArgParser& arg_parser)
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
std::string data_type = arg_parser.get_str("prec");
std::string a_layout = arg_parser.get_str("a_layout");
std::string b_layout = arg_parser.get_str("b_layout");
if(data_type == "fp16")
{
return run_gemm_example_prec_type<ck_tile::half_t>(a_layout, b_layout, argc, argv);
return run_gemm_example_prec_type<ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "bf16")
{
return run_gemm_example_prec_type<ck_tile::bf16_t>(a_layout, b_layout, argc, argv);
return run_gemm_example_prec_type<ck_tile::bf16_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "fp8")
{
return run_gemm_example_prec_type<ck_tile::fp8_t, ck_tile::fp8_t, ck_tile::half_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else if(data_type == "bf8")
{
return run_gemm_example_prec_type<ck_tile::bf8_t, ck_tile::bf8_t, ck_tile::half_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else if(data_type == "i8")
{
return run_gemm_example_prec_type<ck_tile::int8_t, ck_tile::int8_t, int32_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else if(data_type == "pk_int4_t")
{
@@ -223,7 +210,7 @@ int run_gemm_example(int argc, char* argv[])
if constexpr(GemmConfigBase::Pipeline == CK_TILE_PIPELINE_COMPUTE_V3)
{
return run_gemm_example_prec_type<ck_tile::half_t, ck_tile::pk_int4_t, ck_tile::half_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else
{
@@ -238,9 +225,13 @@ int run_gemm_example(int argc, char* argv[])
int main(int argc, char* argv[])
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
try
{
return !run_gemm_example(argc, argv);
return !run_gemm_example(arg_parser);
}
catch(const std::runtime_error& e)
{

View File

@@ -476,6 +476,12 @@ auto create_args(int argc, char* argv[])
return std::make_tuple(result, arg_parser);
}
// Type aliases for memory operation integral constants
using MemoryOpSet =
std::integral_constant<ck_tile::memory_operation_enum, ck_tile::memory_operation_enum::set>;
using MemoryOpAtomicAdd = std::integral_constant<ck_tile::memory_operation_enum,
ck_tile::memory_operation_enum::atomic_add>;
// host API
template <typename ADataType,
typename BDataType,

View File

@@ -210,12 +210,13 @@ template <typename GemmConfig,
typename APrecType,
typename BPrecType = APrecType,
typename CPrecType = APrecType>
int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int argc, char* argv[])
int run_gemm_example_prec_type(std::string a_layout,
std::string b_layout,
ck_tile::ArgParser& arg_parser)
{
using Row = ck_tile::tensor_layout::gemm::RowMajor;
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
auto [result, arg_parser] = create_args(argc, argv);
bool preshuffle = GemmConfig::Preshuffle;
using Row = ck_tile::tensor_layout::gemm::RowMajor;
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
bool preshuffle = GemmConfig::Preshuffle;
if(preshuffle && (a_layout != "R" || b_layout != "C"))
{
@@ -226,7 +227,7 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
if(a_layout == "R" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfig, APrecType, BPrecType, CPrecType>(
argc, argv, Row{}, Col{}, Row{});
arg_parser, Row{}, Col{}, Row{});
}
else
{
@@ -235,12 +236,8 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
}
template <template <typename PreType> typename GemmConfig>
int run_gemm_example(int argc, char* argv[])
int run_gemm_example(ck_tile::ArgParser& arg_parser)
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
std::string data_type = arg_parser.get_str("prec");
std::string a_layout = arg_parser.get_str("a_layout");
std::string b_layout = arg_parser.get_str("b_layout");
@@ -248,26 +245,26 @@ int run_gemm_example(int argc, char* argv[])
if(data_type == "fp16")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::half_t>, ck_tile::half_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else if(data_type == "bf16")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::half_t>, ck_tile::bf16_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else if(data_type == "fp8")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::fp8_t>,
ck_tile::fp8_t,
ck_tile::fp8_t,
ck_tile::half_t>(a_layout, b_layout, argc, argv);
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "bf8")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::bf8_t>,
ck_tile::bf8_t,
ck_tile::bf8_t,
ck_tile::half_t>(a_layout, b_layout, argc, argv);
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else
{
@@ -277,9 +274,13 @@ int run_gemm_example(int argc, char* argv[])
int main(int argc, char* argv[])
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
try
{
return !run_gemm_example<GemmConfigPreshuffle_2>(argc, argv);
return !run_gemm_example<GemmConfigPreshuffle_2>(arg_parser);
}
catch(const std::runtime_error& e)
{

View File

@@ -272,6 +272,25 @@ auto shuffle_b(const ck_tile::HostTensor<T>& t)
return ck_tile::reference_permute(t_view, {0, 2, 3, 1, 4});
}
template <typename CDataType>
bool do_verify(const ck_tile::HostTensor<CDataType>& c_m_n_dev_result,
const ck_tile::HostTensor<CDataType>& c_m_n_ref,
const ck_tile::tuple<double, double>& rtol_atol,
const char* variant)
{
bool pass = ck_tile::check_err(c_m_n_dev_result,
c_m_n_ref,
"Error: Incorrect results!",
rtol_atol.at(ck_tile::number<0>{}),
rtol_atol.at(ck_tile::number<1>{}));
std::cout << "Relative error threshold: " << rtol_atol.at(ck_tile::number<0>{})
<< " Absolute error threshold: " << rtol_atol.at(ck_tile::number<1>{}) << std::endl;
std::cout << "The " << variant << " verification result is:" << (pass ? "correct" : "fail")
<< std::endl;
return pass;
}
template <typename GemmConfig,
typename ADataType,
typename BDataType = ADataType,
@@ -279,16 +298,11 @@ template <typename GemmConfig,
typename ALayout,
typename BLayout,
typename CLayout>
int run_gemm_example_with_layouts(int argc,
char* argv[],
int run_gemm_example_with_layouts(ck_tile::ArgParser& arg_parser,
const ALayout a_layout = ALayout{},
const BLayout b_layout = BLayout{},
[[maybe_unused]] const CLayout c_layout = CLayout{})
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
using AccDataType = typename GemmTypeConfig<ADataType, BDataType, CDataType>::AccDataType;
ck_tile::index_t M = arg_parser.get_int("m");
@@ -430,28 +444,20 @@ int run_gemm_example_with_layouts(int argc,
c_m_n_dev_buf.FromDevice(c_m_n_dev_result.data());
bool pass = true;
// memory on host to store gpu reference result
ck_tile::HostTensor<CDataType> c_m_n_ref(
ck_tile::host_tensor_descriptor(M, N, stride_C, is_row_major(CLayout{})));
c_m_n_ref.SetZero();
if(arg_parser.get_int("v") == 1)
{
ck_tile::HostTensor<CDataType> c_m_n_host_ref(
ck_tile::host_tensor_descriptor(M, N, stride_C, is_row_major(CLayout{})));
c_m_n_host_ref.SetZero();
ck_tile::reference_gemm<ADataType, BDataType, AccDataType, CDataType>(
a_m_k, b_k_n, c_m_n_host_ref);
a_m_k, b_k_n, c_m_n_ref);
const float max_accumulated_value =
*std::max_element(c_m_n_host_ref.mData.begin(), c_m_n_host_ref.mData.end());
*std::max_element(c_m_n_ref.mData.begin(), c_m_n_ref.mData.end());
const auto rtol_atol = calculate_rtol_atol<ADataType, BDataType, AccDataType, CDataType>(
K, kbatch, max_accumulated_value);
pass = ck_tile::check_err(c_m_n_dev_result,
c_m_n_host_ref,
"Error: Incorrect results!",
rtol_atol.at(ck_tile::number<0>{}),
rtol_atol.at(ck_tile::number<1>{}));
std::cout << "Relative error threshold: " << rtol_atol.at(ck_tile::number<0>{})
<< " Absolute error threshold: " << rtol_atol.at(ck_tile::number<1>{})
<< std::endl;
std::cout << "The CPU verification result is:" << (pass ? "correct" : "fail") << std::endl;
pass = do_verify(c_m_n_dev_result, c_m_n_ref, rtol_atol, "CPU");
}
else if(arg_parser.get_int("v") == 2)
{
@@ -465,13 +471,8 @@ int run_gemm_example_with_layouts(int argc,
b_k_n_dev_buf.ToDevice(b_k_n.data());
}
// memory on host to store gpu reference result
ck_tile::HostTensor<CDataType> c_m_n_gpu_ref(
ck_tile::host_tensor_descriptor(M, N, stride_C, is_row_major(CLayout{})));
// memory on device to store gpu reference result
ck_tile::DeviceMem c_m_n_gpu_buf_ref(c_m_n_gpu_ref.get_element_space_size_in_bytes());
c_m_n_gpu_ref.SetZero();
ck_tile::DeviceMem c_m_n_gpu_buf_ref(c_m_n_ref.get_element_space_size_in_bytes());
c_m_n_gpu_buf_ref.SetZero();
ADataType* d_A = static_cast<ADataType*>(a_m_k_dev_buf.GetDeviceBuffer());
@@ -486,21 +487,13 @@ int run_gemm_example_with_layouts(int argc,
BLayout,
CLayout>(d_A, d_B, d_C, M, N, K, stride_A, stride_B, stride_C);
c_m_n_gpu_buf_ref.FromDevice(c_m_n_gpu_ref.data());
c_m_n_gpu_buf_ref.FromDevice(c_m_n_ref.data());
const float max_accumulated_value =
*std::max_element(c_m_n_gpu_ref.mData.begin(), c_m_n_gpu_ref.mData.end());
*std::max_element(c_m_n_ref.mData.begin(), c_m_n_ref.mData.end());
const auto rtol_atol = calculate_rtol_atol<ADataType, BDataType, AccDataType, CDataType>(
K, kbatch, max_accumulated_value);
pass = ck_tile::check_err(c_m_n_dev_result,
c_m_n_gpu_ref,
"Error: Incorrect results!",
rtol_atol.at(ck_tile::number<0>{}),
rtol_atol.at(ck_tile::number<1>{}));
std::cout << "Relative error threshold: " << rtol_atol.at(ck_tile::number<0>{})
<< " Absolute error threshold: " << rtol_atol.at(ck_tile::number<1>{})
<< std::endl;
std::cout << "The GPU verification result is: " << (pass ? "correct" : "fail") << std::endl;
pass = do_verify(c_m_n_dev_result, c_m_n_ref, rtol_atol, "GPU");
}
return pass;

View File

@@ -189,17 +189,11 @@ float gemm(const ck_tile::GemmHostArgs& args, const ck_tile::stream_config& s)
const auto RunSplitk = [&](const auto has_hot_loop_, const auto tail_number_) {
if(args.k_batch == 1)
{
Run(has_hot_loop_,
tail_number_,
ck_tile::integral_constant<ck_tile::memory_operation_enum,
ck_tile::memory_operation_enum::set>{});
Run(has_hot_loop_, tail_number_, MemoryOpSet{});
}
else
{
Run(has_hot_loop_,
tail_number_,
ck_tile::integral_constant<ck_tile::memory_operation_enum,
ck_tile::memory_operation_enum::atomic_add>{});
Run(has_hot_loop_, tail_number_, MemoryOpAtomicAdd{});
}
};
@@ -211,12 +205,13 @@ template <typename GemmConfig,
typename APrecType,
typename BPrecType = APrecType,
typename CPrecType = APrecType>
int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int argc, char* argv[])
int run_gemm_example_prec_type(std::string a_layout,
std::string b_layout,
ck_tile::ArgParser& arg_parser)
{
using Row = ck_tile::tensor_layout::gemm::RowMajor;
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
auto [result, arg_parser] = create_args(argc, argv);
bool preshuffle = GemmConfig::Preshuffle;
using Row = ck_tile::tensor_layout::gemm::RowMajor;
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
bool preshuffle = GemmConfig::Preshuffle;
if(preshuffle && std::is_same_v<BPrecType, ck_tile::pk_int4_t>)
{
@@ -234,12 +229,12 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
if(a_layout == "R" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfig, APrecType, BPrecType, CPrecType>(
argc, argv, Row{}, Col{}, Row{});
arg_parser, Row{}, Col{}, Row{});
}
else if(a_layout == "C" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfig, APrecType, BPrecType, CPrecType>(
argc, argv, Col{}, Col{}, Row{});
arg_parser, Col{}, Col{}, Row{});
}
else
{
@@ -252,22 +247,22 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
if(a_layout == "R" && b_layout == "R")
{
return run_gemm_example_with_layouts<GemmConfig, APrecType, BPrecType, CPrecType>(
argc, argv, Row{}, Row{}, Row{});
arg_parser, Row{}, Row{}, Row{});
}
else if(a_layout == "R" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfig, APrecType, BPrecType, CPrecType>(
argc, argv, Row{}, Col{}, Row{});
arg_parser, Row{}, Col{}, Row{});
}
else if(a_layout == "C" && b_layout == "R")
{
return run_gemm_example_with_layouts<GemmConfig, APrecType, BPrecType, CPrecType>(
argc, argv, Col{}, Row{}, Row{});
arg_parser, Col{}, Row{}, Row{});
}
else if(a_layout == "C" && b_layout == "C")
{
return run_gemm_example_with_layouts<GemmConfig, APrecType, BPrecType, CPrecType>(
argc, argv, Col{}, Col{}, Row{});
arg_parser, Col{}, Col{}, Row{});
}
else
{
@@ -277,12 +272,8 @@ int run_gemm_example_prec_type(std::string a_layout, std::string b_layout, int a
}
template <template <typename PreType> typename GemmConfig>
int run_gemm_example(int argc, char* argv[])
int run_gemm_example(ck_tile::ArgParser& arg_parser)
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
std::string data_type = arg_parser.get_str("prec");
std::string a_layout = arg_parser.get_str("a_layout");
std::string b_layout = arg_parser.get_str("b_layout");
@@ -290,33 +281,33 @@ int run_gemm_example(int argc, char* argv[])
if(data_type == "fp16")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::half_t>, ck_tile::half_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else if(data_type == "bf16")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::half_t>, ck_tile::bf16_t>(
a_layout, b_layout, argc, argv);
a_layout, b_layout, arg_parser);
}
else if(data_type == "fp8")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::fp8_t>,
ck_tile::fp8_t,
ck_tile::fp8_t,
ck_tile::half_t>(a_layout, b_layout, argc, argv);
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "bf8")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::bf8_t>,
ck_tile::bf8_t,
ck_tile::bf8_t,
ck_tile::half_t>(a_layout, b_layout, argc, argv);
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "int8")
{
return run_gemm_example_prec_type<GemmConfig<ck_tile::int8_t>,
ck_tile::int8_t,
ck_tile::int8_t,
ck_tile::int32_t>(a_layout, b_layout, argc, argv);
ck_tile::int32_t>(a_layout, b_layout, arg_parser);
}
else if(data_type == "pk_int4_t")
{
@@ -326,7 +317,7 @@ int run_gemm_example(int argc, char* argv[])
return run_gemm_example_prec_type<GemmConfig<ck_tile::half_t>,
ck_tile::half_t,
ck_tile::pk_int4_t,
ck_tile::half_t>(a_layout, b_layout, argc, argv);
ck_tile::half_t>(a_layout, b_layout, arg_parser);
}
else
{
@@ -341,9 +332,13 @@ int run_gemm_example(int argc, char* argv[])
int main(int argc, char* argv[])
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
try
{
return !run_gemm_example<GemmConfigComputeV3>(argc, argv);
return !run_gemm_example<GemmConfigComputeV3>(arg_parser);
}
catch(const std::runtime_error& e)
{