Grouped conv bwd wei explicit GEMM for odd C/K (#2306)

[ROCm/composable_kernel commit: 7a83f1d510]
This commit is contained in:
Bartłomiej Kocot
2025-06-10 11:17:12 +02:00
committed by GitHub
parent 7cb7aa8e75
commit dbec4063f1
20 changed files with 557 additions and 434 deletions

View File

@@ -2,25 +2,25 @@
set(GROUPED_CONVND_EXP_BWD_WEIGHT
# Explicit instances are common for 2d and 3d
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_default_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mkpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_default_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mkpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_kpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_kpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_kpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_default_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mkpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_default_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mkpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_default_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mkpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_kpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_kpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_kpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_default_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_mkpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_mn_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_m_instance.cpp
explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_n_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_default_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mnkpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_default_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mnkpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_default_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_mnkpadding_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_mn_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_n_instance.cpp
explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_m_instance.cpp
)
add_instance_library(device_grouped_convnd_bwd_weight_instance ${GROUPED_CONVND_EXP_BWD_WEIGHT})

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,10 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_kpadding_insta
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmMNKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +58,7 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_kpadding_insta
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmMNKPadding>>(instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,11 +32,11 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mkpadding_in
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Intrawave, GemmMKPadding>>(
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Intrawave, GemmMNKPadding>>(
instances);
}
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -59,7 +59,7 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_mkpadding_in
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Intrawave, GemmMKPadding>>(
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Intrawave, GemmMNKPadding>>(
instances);
}

View File

@@ -1,67 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_exp_gemm_xdl_universal_km_kn_mn_instance.hpp"
#include "ck/host_utility/device_prop.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_kpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
NHWGK,
BF16,
BF16,
BF16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_explicit_gemm_device_operation_instances<
2,
NHWGC,
GKYXC,
NHWGK,
BF16,
BF16,
BF16,
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Interwave, GemmKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_kpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
NDHWGK,
BF16,
BF16,
BF16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_explicit_gemm_device_operation_instances<
3,
NDHWGC,
GKZYXC,
NDHWGK,
BF16,
BF16,
BF16,
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Interwave, GemmKPadding>>(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,11 +32,11 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mkpadding_in
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Interwave, GemmMKPadding>>(
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Interwave, GemmMNKPadding>>(
instances);
}
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -59,7 +59,7 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v2_mkpadding_in
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Interwave, GemmMKPadding>>(
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Interwave, GemmMNKPadding>>(
instances);
}

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_m_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,12 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mkpadding_inst
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmMKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_m_instances<BF16,
Intrawave,
GemmMNKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_m_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +60,9 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mkpadding_inst
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmMKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_m_instances<BF16,
Intrawave,
GemmMNKPadding>>(instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_mn_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,12 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_kpadding_ins
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Intrawave, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_mn_instances<BF16,
Intrawave,
GemmMNKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_mn_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +60,9 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_mem_v1_kpadding_ins
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<BF16, Intrawave, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_mn_instances<BF16,
Intrawave,
GemmMNKPadding>>(instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_n_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,11 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mpadding_insta
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmMPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_odd_n_instances<BF16, Intrawave, GemmMNKPadding>>(
instances);
}
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mpadding_instances(
void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_odd_n_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +59,8 @@ void add_device_grouped_convnd_bwd_weight_bf16_bf16_bf16_exp_comp_mpadding_insta
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<BF16, GemmMPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_odd_n_instances<BF16, Intrawave, GemmMNKPadding>>(
instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,10 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mpadding_instance
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmMPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmMNKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +58,7 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mpadding_instance
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmMPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmMNKPadding>>(instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,11 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mkpadding_insta
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Intrawave, GemmMKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Intrawave, GemmMNKPadding>>(
instances);
}
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +59,8 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_mkpadding_insta
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Intrawave, GemmMKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Intrawave, GemmMNKPadding>>(
instances);
}
} // namespace instance

View File

@@ -1,67 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_exp_gemm_xdl_universal_km_kn_mn_instance.hpp"
#include "ck/host_utility/device_prop.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_mkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
NHWGK,
F16,
F16,
F16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_explicit_gemm_device_operation_instances<
2,
NHWGC,
GKYXC,
NHWGK,
F16,
F16,
F16,
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Interwave, GemmMKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_mkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
NDHWGK,
F16,
F16,
F16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_explicit_gemm_device_operation_instances<
3,
NDHWGC,
GKZYXC,
NDHWGK,
F16,
F16,
F16,
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Interwave, GemmMKPadding>>(instances);
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,11 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_kpadding_instan
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Interwave, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Interwave, GemmMNKPadding>>(
instances);
}
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_mnkpadding_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +59,8 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v2_kpadding_instan
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Interwave, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Interwave, GemmMNKPadding>>(
instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_m_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,12 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mkpadding_instanc
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmMKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_m_instances<F16,
Intrawave,
GemmMNKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mkpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_m_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +60,9 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_mkpadding_instanc
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmMKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_m_instances<F16,
Intrawave,
GemmMNKPadding>>(instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_mn_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,12 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_kpadding_instan
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Intrawave, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_mn_instances<F16,
Intrawave,
GemmMNKPadding>>(instances);
}
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_mn_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +60,9 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_mem_v1_kpadding_instan
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_mem_instances<F16, Intrawave, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_irregular_odd_mn_instances<F16,
Intrawave,
GemmMNKPadding>>(instances);
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_n_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<2,
NHWGC,
GKYXC,
@@ -32,10 +32,11 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_kpadding_instance
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_odd_n_instances<F16, Intrawave, GemmMNKPadding>>(
instances);
}
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_kpadding_instances(
void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_odd_n_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
@@ -58,7 +59,8 @@ void add_device_grouped_convnd_bwd_weight_f16_f16_f16_exp_comp_kpadding_instance
PassThrough,
PassThrough,
PassThrough,
device_gemm_xdl_universal_km_kn_mn_comp_instances<F16, GemmKPadding>>(instances);
device_gemm_xdl_universal_km_kn_mn_odd_n_instances<F16, Intrawave, GemmMNKPadding>>(
instances);
}
} // namespace instance