mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
Split the gemm_multi_abd instances. (#1306)
* split the gemm_multi_abd instances * update the dates
This commit is contained in:
@@ -2,9 +2,14 @@
|
||||
set(GEMM_MULTI_ABD_INSTANCES)
|
||||
|
||||
list(APPEND GEMM_MULTI_ABD_INSTANCES
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
device_gemm_xdl_multi_abd_bias_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
device_gemm_xdl_multi_abd_gelu_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
device_gemm_xdl_multi_abd_bias_gelu_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
device_gemm_xdl_multi_abd_bias_gelu_bf16_i8_bf16_mk_nk_mn_v1_instance.cpp
|
||||
|
||||
device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
device_gemm_xdl_multi_abd_multiply_bias_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
device_gemm_xdl_multi_abd_multiply_gelu_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
device_gemm_xdl_multi_abd_multiply_bias_gelu_bf16_i8_bf16_mk_kn_mn_v1_instance.cpp
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp"
|
||||
|
||||
#include "device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_common.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
Multiply,
|
||||
PassThrough>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
PassThrough,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
PassThrough,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,58 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp"
|
||||
|
||||
#include "device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_common.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_bias_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<D0Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<D0DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
Multiply,
|
||||
Add>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<D0Layout>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<D0DataType>,
|
||||
Multiply,
|
||||
Add,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<D0Layout>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<D0DataType>,
|
||||
Multiply,
|
||||
Add,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -52,112 +52,6 @@ void add_device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_bias_gelu_v1_instances(
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_bias_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<D0Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<D0DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
Multiply,
|
||||
Add>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<D0Layout>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<D0DataType>,
|
||||
Multiply,
|
||||
Add,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<D0Layout>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<D0DataType>,
|
||||
Multiply,
|
||||
Add,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
Multiply,
|
||||
PassThrough>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
PassThrough,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
PassThrough,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_gelu_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
Multiply,
|
||||
FastGelu>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
FastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
FastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp"
|
||||
|
||||
#include "device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_common.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_gelu_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
Multiply,
|
||||
FastGelu>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
FastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout, B1Layout>,
|
||||
ck::Tuple<>,
|
||||
ck::Tuple<B0DataType, B1DataType>,
|
||||
ck::Tuple<>,
|
||||
Multiply,
|
||||
FastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,58 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp"
|
||||
|
||||
#include "device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_common.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
PassThrough,
|
||||
Multiply>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
Multiply,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
Multiply,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,58 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp"
|
||||
|
||||
#include "device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_common.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_bias_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<D0Layout, B1Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<D0DataType, B1DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
PassThrough,
|
||||
MultiplyAdd>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<D0Layout, B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<D0DataType, B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyAdd,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<D0Layout, B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<D0DataType, B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyAdd,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -52,111 +52,6 @@ void add_device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_bias_gelu_v1_i
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_bias_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<D0Layout, B1Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<D0DataType, B1DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
PassThrough,
|
||||
MultiplyAdd>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<D0Layout, B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<D0DataType, B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyAdd,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<D0Layout, B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<D0DataType, B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyAdd,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
PassThrough,
|
||||
Multiply>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
Multiply,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
Multiply,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_gelu_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
PassThrough,
|
||||
MultiplyFastGelu>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyFastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyFastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp"
|
||||
|
||||
#include "device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_common.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
void add_device_gemm_xdl_multi_abd_multiply_bf16_i8_bf16_mk_kn_mn_gelu_v1_instances(
|
||||
std::vector<std::unique_ptr<DeviceGemmMultipleABD<AsLayout,
|
||||
ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ELayout,
|
||||
AsDataType,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
EDataType,
|
||||
AElementOp,
|
||||
PassThrough,
|
||||
MultiplyFastGelu>>>& instances)
|
||||
{
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_comp_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyFastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
add_device_operation_instances(
|
||||
instances,
|
||||
device_gemm_xdl_multi_abd_bf16_i8_bf16_mk_kn_mn_mem_instances<ck::Tuple<B0Layout>,
|
||||
ck::Tuple<B1Layout>,
|
||||
ck::Tuple<B0DataType>,
|
||||
ck::Tuple<B1DataType>,
|
||||
PassThrough,
|
||||
MultiplyFastGelu,
|
||||
GemmMNKPadding,
|
||||
Interwave>{});
|
||||
}
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
Reference in New Issue
Block a user