mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Add grouped gemm instances for RDNA4 (#3237)
* wip: grouped_gemm implementation based on wmma kernel + example for fp16 * chore: clean up grouped_gem_wmma_splitk_fp16 example * chore: add cmake options to fully disable XDL or WMMA kernels * feat: add tests for grouped gemma wmma instances for f16 and bf16 (all layouts) * chore: add grouped gemm wmma bf16 example * refactor: reuse more code between instance factory functions * chore: turn test failure if not all batch sizes are supported into a warning * chore: made failing of test on unsupported instances conditional to not break old tests * chore: add log message to failure case where AK1/BK1/KBatch is too high for K value * fix: issue with new overloads of GridwiseGemm_wmma_cshuffle_v3::Run() * fix: stray comma after parameter list * fix: compilation issues on RDNA3 and tests failing due to unsupported problems still being ran * chore: update copyright in header comments * nit: minor feebdack * refactor: unified XDL / wma tests * fix: properly disable FP8 instances when ONLY targeting gfx11 * refactor: add v3 suffix to grouped_gemm device struct name * fix: small typos in example code * fix: fully exclude xdl/wmma instances when using the corresponding cmake flags * chore: remove unused destructor and added pipeline support checks to remove unnecessary paths * fix: make sure to not add instance library to group if library was skipped * fix: make sure xdl grouped gemm doesnt fail the new test * fix: explicitly exclude test if no xdl/wmma support, as pattern matching fails in this case * fix: examples not working since dependent types and functions were moved to ck namespace in develop * fix: tests failing when compiling for just gfx11 due to trying to run unsupported instances * chore: replace/add copyright headers with new format
This commit is contained in:
@@ -42,10 +42,11 @@ bool profile_grouped_gemm_impl(int do_verification,
|
||||
const std::vector<int>& StrideAs,
|
||||
const std::vector<int>& StrideBs,
|
||||
const std::vector<int>& StrideCs,
|
||||
const std::vector<int>& kbatches = {},
|
||||
int n_warmup = 1,
|
||||
int n_iter = 10,
|
||||
int instance_index = -1)
|
||||
const std::vector<int>& kbatches = {},
|
||||
int n_warmup = 1,
|
||||
int n_iter = 10,
|
||||
int instance_index = -1,
|
||||
bool fail_if_no_supported_instance = false)
|
||||
{
|
||||
bool pass = true;
|
||||
// TODO: Fixme - we do not pass compute data type here but need it
|
||||
@@ -225,6 +226,7 @@ bool profile_grouped_gemm_impl(int do_verification,
|
||||
}
|
||||
}
|
||||
// profile device GEMM instances
|
||||
int instances_supporting_all_batch_sizes = 0;
|
||||
for(auto& gemm_ptr : op_ptrs)
|
||||
{
|
||||
auto argument_ptr =
|
||||
@@ -268,6 +270,7 @@ bool profile_grouped_gemm_impl(int do_verification,
|
||||
kbatch_list = kbatches;
|
||||
}
|
||||
|
||||
bool all_batch_sizes_supported = true;
|
||||
for(std::size_t j = 0; j < kbatch_list.size(); j++)
|
||||
{
|
||||
auto kbatch_curr = kbatch_list[j];
|
||||
@@ -367,10 +370,30 @@ bool profile_grouped_gemm_impl(int do_verification,
|
||||
}
|
||||
else
|
||||
{
|
||||
all_batch_sizes_supported = false;
|
||||
std::cout << "Instance: " << gemm_name << ", does not support this GEMM problem"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// If all batch sizes were supported by this instance, the instance can be marked as
|
||||
// 'supported' for this problem
|
||||
if(all_batch_sizes_supported)
|
||||
{
|
||||
++instances_supporting_all_batch_sizes;
|
||||
}
|
||||
}
|
||||
|
||||
// Warn if not a single instance was supported
|
||||
if(instances_supporting_all_batch_sizes == 0)
|
||||
{
|
||||
std::cout << "Warning! No instance found that supported all of the batch sizes."
|
||||
<< std::endl;
|
||||
|
||||
if(fail_if_no_supported_instance)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if(time_kernel)
|
||||
@@ -384,6 +407,7 @@ bool profile_grouped_gemm_impl(int do_verification,
|
||||
std::cout << "grouped_gemm_instance (" << instance_index << "/" << num_kernel << "): Passed"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
return pass;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user