mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Stream-K Reduction option as Runtime parameter and Compilation Error Fix (SK- Reduction) (#2145)
* reduction is passed as runtime parameter * clang * Update include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_streamk_v3.hpp Co-authored-by: John Afaganis <john.afaganis@amd.com> * Update include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp * remove comment ---------
This commit is contained in:
committed by
GitHub
parent
06e0b8436c
commit
6fad1c4874
@@ -15,6 +15,8 @@
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
#include "ck/utility/data_type.hpp"
|
||||
|
||||
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/fill.hpp"
|
||||
@@ -57,8 +59,9 @@ struct ProblemSizeStreamK_universal final
|
||||
ck::index_t StrideB = -1;
|
||||
ck::index_t StrideC = -1;
|
||||
|
||||
ck::index_t Grid_size = -1; // defaults to max occupancy
|
||||
ck::index_t Streamk_sel = 1; // defaults to 1-tile SK
|
||||
ck::index_t Grid_size = -1; // defaults to max occupancy
|
||||
ck::index_t Streamk_sel = 1; // defaults to 1-tile SK
|
||||
ck::StreamKReductionStrategy reduction_strategy = ck::StreamKReductionStrategy::Atomic;
|
||||
};
|
||||
|
||||
struct ProblemSizeSplitK final
|
||||
@@ -173,7 +176,19 @@ bool parse_cmd_args<ProblemSizeStreamK_universal>(int argc,
|
||||
if(argc >= 11)
|
||||
{
|
||||
problem_size.Streamk_sel = std::stoi(argv[10]);
|
||||
problem_size.Grid_size = std::stoi(argv[11]);
|
||||
|
||||
if(argc >= 12)
|
||||
{
|
||||
problem_size.Grid_size = std::stoi(argv[11]);
|
||||
|
||||
if(argc >= 13)
|
||||
{
|
||||
int reduction_strategy = std::stoi(argv[12]);
|
||||
problem_size.reduction_strategy = reduction_strategy == 0
|
||||
? ck::StreamKReductionStrategy::Atomic
|
||||
: ck::StreamKReductionStrategy::Reduction;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -185,7 +200,9 @@ bool parse_cmd_args<ProblemSizeStreamK_universal>(int argc,
|
||||
<< "arg4 to 9: M (256x), N(128x), K(32x), StrideA, StrideB, StrideC (default: -1 or 0)"
|
||||
<< std::endl
|
||||
<< "arg10: stream-k select (-1: default config, 0: all DP, 1: 1-tile SK, 2: 2-tile SK)"
|
||||
<< "\narg11: Grid_size(-1 for max occupancy)" << std::endl;
|
||||
<< std::endl
|
||||
<< "arg11: Grid_size(-1 for max occupancy)" << std::endl
|
||||
<< "arg12: Reduction strategy (0: Atomic, 1: Reduction)" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,16 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
auto Grid_size = problem_size.Grid_size;
|
||||
auto Streamk_sel = problem_size.Streamk_sel;
|
||||
|
||||
auto reduction_strategy = problem_size.reduction_strategy;
|
||||
if(reduction_strategy == ck::StreamKReductionStrategy::Atomic)
|
||||
{
|
||||
std::cout << "Using Atomic reduction strategy" << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Using Parallel reduction strategy" << std::endl;
|
||||
}
|
||||
|
||||
auto f_host_tensor_descriptor =
|
||||
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
|
||||
if constexpr(std::is_same_v<decltype(layout), ck::tensor_layout::gemm::RowMajor>)
|
||||
@@ -152,7 +162,8 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
Grid_size,
|
||||
a_element_op,
|
||||
b_element_op,
|
||||
c_element_op);
|
||||
c_element_op,
|
||||
reduction_strategy);
|
||||
|
||||
if(!gemm.IsSupportedArgument(argument))
|
||||
{
|
||||
@@ -242,7 +253,10 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
float gb_per_sec = num_btype / 1.E6 / ave_time;
|
||||
|
||||
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
|
||||
<< " GB/s, " << gemm.GetTypeString() << std::endl;
|
||||
<< " GB/s, " << gemm.GetTypeString()
|
||||
<< (reduction_strategy == ck::StreamKReductionStrategy::Atomic ? " (Atomic)"
|
||||
: " (Reduction)")
|
||||
<< std::endl;
|
||||
}
|
||||
return pass;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user