WMMA support for GEMM reduce (#2823)

Added gemm + reduce instance library for RDNA4. This includes:

- New device implementation running GEMM and reduction kernel
- instances for wmma (xdl parity)
- examples for wmma (xdl parity)
- tests for existing xdl and wmma

[ROCm/composable_kernel commit: b25d4d684a]
This commit is contained in:
Wojciech Laskowski
2025-09-12 21:36:43 +02:00
committed by GitHub
parent 8c0cdebe63
commit f2edb06bb0
27 changed files with 1911 additions and 89 deletions

View File

@@ -3,6 +3,11 @@
#pragma once
#if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC)
#include <iostream>
#include <ostream>
#endif
#include "ck/utility/env.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/multi_index_transform_helper.hpp"
@@ -1049,6 +1054,13 @@ struct GridwiseGemm_wmma_cshuffle_v3_base
{
if(num_k_loop <= BlockwiseGemmPipe::PrefetchStages)
{
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
std::cout << "Pipeline validation failed: num_k_loop (" << num_k_loop
<< ") <= PrefetchStages (" << BlockwiseGemmPipe::PrefetchStages
<< ") for pipeline version != v1." << __FILE__ << ":" << __LINE__
<< ", in function: " << __func__ << std::endl;
}
return false;
}
}