mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
Extend XDL kernel to Support RDNA3/4 - Part 4 (#2724)
* Fix example
* fix build error
* update pk_i4 & moe test case
* fix all instance build (examples)
* fix batched_gemm_gemm (example)
* disable example_gemm_bias_softmax_gemm_permute on gfx11
* remove unnecessary disable gfx11
* update tests
* update tests2
[ROCm/composable_kernel commit: 321627aec5]
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
@@ -199,9 +199,10 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
return true;
|
||||
}
|
||||
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950"))
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950" ||
|
||||
ck::is_gfx11_supported() || ck::is_gfx12_supported()))
|
||||
{
|
||||
std::cout << "This kernel support gfx942 and gfx950 only" << std::endl;
|
||||
std::cout << "This kernel support gfx942, gfx950, gfx11 and gfx12 only" << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
@@ -249,9 +249,10 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
return true;
|
||||
}
|
||||
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950"))
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950" ||
|
||||
ck::is_gfx11_supported() || ck::is_gfx12_supported()))
|
||||
{
|
||||
std::cout << "This kernel support gfx942 and gfx950 only" << std::endl;
|
||||
std::cout << "This kernel support gfx942, gfx950, gfx11 and gfx12 only" << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
@@ -38,14 +38,14 @@ using DeviceGemmV2Instance =
|
||||
AElementOp, BElementOp, CElementOp, GemmDefault,
|
||||
256, Scale_Block_N, Scale_Block_K,
|
||||
128, 128,
|
||||
KPerBlock, 8, 32,
|
||||
32, 32,
|
||||
4, 1,
|
||||
KPerBlock, 8, 16,
|
||||
16, 16,
|
||||
8, 2,
|
||||
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>,
|
||||
2, 8, 8, 0,
|
||||
S<2, 128, 1>, S<1, 0, 2>, S<1, 0, 2>,
|
||||
2, 32, 32, 0,
|
||||
1, 1, S<1, 32, 1, 8>, 8,
|
||||
2, 16, 16, 0,
|
||||
1, 1, S<1, 16, 1, 16>, 4,
|
||||
ck::BlockGemmPipelineScheduler::Intrawave, ck::BlockGemmPipelineVersion::v3, CDataType, CDataType, PermuteA, PermuteB>;
|
||||
|
||||
// clang-format on
|
||||
@@ -281,9 +281,10 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
return true;
|
||||
}
|
||||
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950"))
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950" ||
|
||||
ck::is_gfx11_supported() || ck::is_gfx12_supported()))
|
||||
{
|
||||
std::cout << "This kernel support gfx942 and gfx950 only" << std::endl;
|
||||
std::cout << "This kernel support gfx942, gfx950, gfx11 and gfx12 only" << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecializa
|
||||
|
||||
static constexpr bool PermuteA = false;
|
||||
static constexpr bool PermuteB = false;
|
||||
|
||||
static constexpr int KPack = 32; // int4 -> 32, fp8 -> 16, fp16 -> 8
|
||||
// clang-format off
|
||||
#if 0
|
||||
using DeviceGemmV2Instance =
|
||||
@@ -56,14 +56,14 @@ using DeviceGemmV2Instance =
|
||||
AElementOp, BElementOp, CElementOp, GemmDefault,
|
||||
256,
|
||||
256, 256,
|
||||
128, 16, 32,
|
||||
32, 32,
|
||||
4, 4,
|
||||
128, 16, KPack,
|
||||
16, 16,
|
||||
8, 8,
|
||||
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>,
|
||||
2, 16, 16, 0,
|
||||
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>,
|
||||
2, 32, 32, 0,
|
||||
1, 1, S<1, 32, 1, 8>, 8,
|
||||
1, 1, S<1, 32, 1, 8>, 4,
|
||||
ck::BlockGemmPipelineScheduler::Intrawave, ck::BlockGemmPipelineVersion::v3, F8, F8, PermuteA, PermuteB>;
|
||||
|
||||
#endif
|
||||
@@ -160,7 +160,6 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
auto gemm = DeviceGemmV2Instance{};
|
||||
|
||||
// weight pre-shuffle
|
||||
int KPack = 32; // int4 -> 32, fp8 -> 16, fp16 -> 8
|
||||
int NLane = gemm.GetPreShuffleParameters();
|
||||
int KLane = 64 / NLane;
|
||||
|
||||
@@ -269,9 +268,10 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
return true;
|
||||
}
|
||||
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950"))
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950" ||
|
||||
ck::is_gfx12_supported()))
|
||||
{
|
||||
std::cout << "This kernel support gfx942 and gfx950 only" << std::endl;
|
||||
std::cout << "This kernel support gfx942, gfx950 and gfx12 only" << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
@@ -38,14 +38,14 @@ using DeviceGemmV2Instance =
|
||||
AElementOp, BElementOp, CElementOp, GemmDefault,
|
||||
256,
|
||||
128, 128,
|
||||
KPerBlock, 16, 32,
|
||||
32, 32,
|
||||
2, 2,
|
||||
KPerBlock, 16, 16,
|
||||
16, 16,
|
||||
4, 4,
|
||||
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>,
|
||||
2, 16, 16, 0,
|
||||
S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>,
|
||||
2, 32, 32, 0,
|
||||
1, 1, S<1, 32, 1, 8>, 8,
|
||||
2, 16, 16, 0,
|
||||
1, 1, S<1, 32, 1, 8>, 4,
|
||||
ck::BlockGemmPipelineScheduler::Interwave, ck::BlockGemmPipelineVersion::v2, ADataType, ADataType, PermuteA, PermuteB>;
|
||||
|
||||
// clang-format on
|
||||
@@ -247,9 +247,10 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
|
||||
return true;
|
||||
}
|
||||
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950"))
|
||||
if(!(ck::get_device_name() == "gfx942" || ck::get_device_name() == "gfx950" ||
|
||||
ck::is_gfx12_supported()))
|
||||
{
|
||||
std::cout << "This kernel support gfx942 and gfx950 only" << std::endl;
|
||||
std::cout << "This kernel support gfx942, gfx950 and gfx12 only" << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2023-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
@@ -36,7 +36,7 @@ using DeviceGemmV2Instance =
|
||||
2, 16, 16, 0,
|
||||
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>,
|
||||
2, 16, 16, 0,
|
||||
1, 2, S<1, 32, 1, 8>, 8,
|
||||
1, 2, S<1, 32, 1, 8>, 4,
|
||||
ck::BlockGemmPipelineScheduler::Intrawave,ck::BlockGemmPipelineVersion::v3, ck::f8_t>;
|
||||
// clang-format on
|
||||
|
||||
|
||||
Reference in New Issue
Block a user