Change relu to clamp for grouped conv fwd instances (#2249)

This commit is contained in:
Bartłomiej Kocot
2025-05-29 00:51:25 +02:00
committed by GitHub
parent 6df1c56ad6
commit e7906dd644
34 changed files with 291 additions and 195 deletions

View File

@@ -0,0 +1,16 @@
# ONLY XDL_KERNELS
add_instance_library(device_grouped_conv2d_fwd_bias_clamp_instance
xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp
xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp
)

View File

@@ -10,7 +10,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -22,7 +22,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
if(ck::get_device_name() == "gfx950")
{
@@ -35,7 +35,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
@@ -46,7 +46,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_
NHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
@@ -57,7 +57,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_
NHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
}

View File

@@ -10,7 +10,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -22,7 +22,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_ins
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<2,
@@ -32,7 +32,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_ins
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<2,
@@ -42,7 +42,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_ins
NHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<2,
@@ -52,7 +52,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_ins
NHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -10,7 +10,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -22,7 +22,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_par
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
if(ck::get_device_name() != "gfx950")
{
@@ -35,7 +35,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_par
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
@@ -46,7 +46,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_par
NHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
@@ -57,7 +57,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_par
NHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
}

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_in
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_16x16_instances<2,
@@ -31,7 +31,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_in
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_16x16_instances<2,
@@ -41,7 +41,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_in
NHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_16x16_instances<2,
@@ -51,7 +51,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_in
NHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_instance
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<2,
@@ -31,7 +31,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_instance
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<2,
@@ -41,7 +41,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_instance
NHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<2,
@@ -51,7 +51,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_instance
NHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_large_tensor_nhwgc_gkyxc_nhwgk_
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(
instances,
@@ -32,7 +32,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_large_tensor_nhwgc_gkyxc_nhwgk_
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inte
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<2,
@@ -32,7 +32,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inte
ConvFwdDefault,
Interwave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<2,
@@ -43,7 +43,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inte
ConvFwd1x1P0,
Interwave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<2,
@@ -54,7 +54,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inte
ConvFwd1x1S1P0,
Interwave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intr
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<2,
@@ -32,7 +32,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intr
ConvFwdDefault,
Intrawave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<2,
@@ -43,7 +43,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intr
ConvFwd1x1P0,
Intrawave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<2,
@@ -54,7 +54,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intr
ConvFwd1x1S1P0,
Intrawave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -10,7 +10,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_bias_relu_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances(
void add_device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
@@ -22,7 +22,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_merged_groups_nhwgc_gkyxc_nhwgk
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
if(ck::get_device_name() == "gfx950")
{
@@ -35,7 +35,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_merged_groups_nhwgc_gkyxc_nhwgk
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
@@ -46,7 +46,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_merged_groups_nhwgc_gkyxc_nhwgk
NHWGK,
ConvFwd3x3,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
else
{
@@ -59,7 +59,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_merged_groups_nhwgc_gkyxc_nhwgk
NHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
@@ -70,7 +70,7 @@ void add_device_grouped_conv2d_fwd_bias_relu_xdl_merged_groups_nhwgc_gkyxc_nhwgk
NHWGK,
ConvFwd3x3,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
}

View File

@@ -1,16 +0,0 @@
# ONLY XDL_KERNELS
add_instance_library(device_grouped_conv2d_fwd_bias_relu_instance
xdl/device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
xdl/device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp
xdl/large_tensor/device_grouped_conv2d_fwd_bias_relu_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
xdl/merged_groups/device_grouped_conv2d_fwd_bias_relu_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_bias_relu_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp
)

View File

@@ -0,0 +1,16 @@
# ONLY XDL_KERNELS
set(GROUPED_CONV3D_FWD
xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp
xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp
)
add_instance_library(device_grouped_conv3d_fwd_bias_clamp_instance ${GROUPED_CONV3D_FWD})

View File

@@ -10,7 +10,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances(
void add_device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
@@ -22,7 +22,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
@@ -32,7 +32,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
@@ -41,7 +41,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
@@ -50,7 +50,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
if(ck::get_device_name() != "gfx950")
{
@@ -63,7 +63,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_part2<3,
@@ -73,7 +73,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_part2<3,
@@ -83,7 +83,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
if(ck::get_device_name() == "gfx950")
@@ -97,7 +97,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_2x<3,
@@ -107,7 +107,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_2x<3,
@@ -117,7 +117,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_
NDHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
}

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances(
void add_device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_16x16_instances<3,
@@ -31,7 +31,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16
NDHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_16x16_instances<3,
NDHWGC,
@@ -40,7 +40,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16
NDHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_16x16_instances<3,
NDHWGC,
@@ -49,7 +49,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16
NDHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
void add_device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_insta
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<3,
@@ -31,7 +31,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_insta
NDHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<3,
NDHWGC,
@@ -40,7 +40,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_insta
NDHWGK,
ConvFwd1x1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<3,
NDHWGC,
@@ -49,7 +49,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_insta
NDHWGK,
ConvFwd1x1S1P0,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_bias_relu_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
void add_device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_large_tensor_ndhwgc_gkzyxc_ndhw
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(
instances,
@@ -32,7 +32,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_large_tensor_ndhwgc_gkzyxc_ndhw
NDHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances(
void add_device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
@@ -32,7 +32,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
ConvFwdDefault,
Interwave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NDHWGC,
@@ -42,7 +42,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
ConvFwd1x1P0,
Interwave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NDHWGC,
@@ -52,7 +52,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
ConvFwd1x1S1P0,
Interwave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances(
void add_device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
@@ -32,7 +32,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
ConvFwdDefault,
Intrawave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NDHWGC,
@@ -42,7 +42,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
ConvFwd1x1P0,
Intrawave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NDHWGC,
@@ -52,7 +52,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_i
ConvFwd1x1S1P0,
Intrawave,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_bias_relu_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
void add_device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
@@ -21,7 +21,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_merged_groups_ndhwgc_gkzyxc_ndh
BF16,
PassThrough,
PassThrough,
AddRelu>>>& instances)
AddClamp>>>& instances)
{
add_device_operation_instances(
instances,
@@ -32,7 +32,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_merged_groups_ndhwgc_gkzyxc_ndh
NDHWGK,
ConvFwdDefault,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_merged_groups_bf16_instances<3,
@@ -42,7 +42,7 @@ void add_device_grouped_conv3d_fwd_bias_relu_xdl_merged_groups_ndhwgc_gkzyxc_ndh
NDHWGK,
ConvFwd3x3,
Tuple<BF16>,
AddRelu>{});
AddClamp>{});
}
} // namespace instance

View File

@@ -1,16 +0,0 @@
# ONLY XDL_KERNELS
set(GROUPED_CONV3D_FWD
xdl/device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp
xdl/large_tensor/device_grouped_conv3d_fwd_bias_relu_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/merged_groups/device_grouped_conv3d_fwd_bias_relu_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_bias_relu_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp
)
add_instance_library(device_grouped_conv3d_fwd_bias_relu_instance ${GROUPED_CONV3D_FWD})