Add new gemm multiply multiply instances on gfx950 (#3213)

This commit is contained in:
jefyang1
2025-11-14 10:20:41 -06:00
committed by GitHub
parent caadb896f1
commit d30babbd00
6 changed files with 62 additions and 3 deletions

View File

@@ -795,7 +795,7 @@ struct DeviceGemmMultiD_Xdl_CShuffle_V3 : public DeviceGemmMultipleDSplitK<ALayo
{BlockGemmPipelineVersion::v5, "v5"}};
// clang-format off
str << "DeviceGemmXdlUniversal"
str << "DeviceGemmMultiD_Xdl_CShuffle_V3"
<< "<"
<< getGemmSpecializationString(GemmSpec) << ", "
<< std::string(ALayout::name)[0]
@@ -817,7 +817,11 @@ struct DeviceGemmMultiD_Xdl_CShuffle_V3 : public DeviceGemmMultipleDSplitK<ALayo
<< "BlkGemmPipelineVersion: "
<< BlkGemmPipelineVersionToString[BlkGemmPipelineVer] << ", "
<< "BlkGemmPipelinePrefetchStages: "
<< GridwiseGemm64::BlockwiseGemmPipe::PrefetchStages;
<< GridwiseGemm64::BlockwiseGemmPipe::PrefetchStages << ", "
<< "AK1: "
<< AK1 << ", "
<< "BK1: "
<< BK1;
// clang-format on
return str.str();

View File

@@ -1145,6 +1145,22 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3
InMemoryDataOperationEnum CGlobalMemoryDataOperation_ = InMemoryDataOperationEnum::Set>
__device__ static bool constexpr IsValidCompilationParameter()
{
enum struct Arch : bool
{
#if defined(__gfx950__)
is_gfx950_build = true,
#else
is_gfx950_build = false,
#endif
};
// skip building the instances with K1>=32 on pre-gfx950
if constexpr((static_cast<bool>(Arch::is_gfx950_build) == false) &&
(AK1Number >= 32 || BK1Number >= 32))
{
return false;
}
constexpr bool valid = ck::tensor_operation::device::IsValidGemmCompilationParameter<
BlockSize,
MPerBlock,