Revert "Shard several of the most costly targets. (#2266)" (#2361)

This reverts commit 3a0cb27966.
This commit is contained in:
Illia Silin
2025-06-17 13:56:30 -07:00
committed by GitHub
parent cc98a41f46
commit cdfd7722bf
41 changed files with 820 additions and 1318 deletions

View File

@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
@@ -688,6 +688,7 @@ void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances(
PassThrough,
PassThrough,
PassThrough>>>& instances);
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,

View File

@@ -1,5 +1,5 @@
# XDL_DL_WMMA_KERNELS
set(GROUPED_CONV2D_FWD
add_instance_library(device_grouped_conv2d_fwd_instance
#xdl
# GNHWC, GKYXC, GNHWK
xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp
@@ -19,6 +19,8 @@ set(GROUPED_CONV2D_FWD
xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_f32_instance.cpp
xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkyxc_ngkhw_int8_instance.cpp
# NGCHW, GKCYX, NGKHW
xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instance.cpp
xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instance.cpp
xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_instance.cpp
xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp
xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_16x16_instance.cpp
@@ -44,10 +46,12 @@ set(GROUPED_CONV2D_FWD
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instance.cpp
# NHWGC, GKYXC, NHWGK
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instance.cpp
# NGCHW, GKCYX, NGKHW
xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_mem_intra_instance.cpp
@@ -67,6 +71,7 @@ set(GROUPED_CONV2D_FWD
xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instance.cpp
# NGCHW, GKCYX, NGKHW
xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_comp_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f32_comp_instance.cpp
xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_2x_instance.cpp
@@ -100,47 +105,3 @@ set(GROUPED_CONV2D_FWD
wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp
wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp
)
# Add generated files for sharded instantiations.
include(ShardInstantiation)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances
TEMPLATE_FILE xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instance.in
NUM_SHARDS 16
SRC_LIST GROUPED_CONV2D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl
)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances
TEMPLATE_FILE xdl/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instance.in
NUM_SHARDS 16
SRC_LIST GROUPED_CONV2D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl
)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances
TEMPLATE_FILE xdl/comp/device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instance.in
NUM_SHARDS 16
SRC_LIST GROUPED_CONV2D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/comp
)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instance.in
NUM_SHARDS 21
SRC_LIST GROUPED_CONV2D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instance.in
NUM_SHARDS 21
SRC_LIST GROUPED_CONV2D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
add_instance_library(device_grouped_conv2d_fwd_instance ${GROUPED_CONV2D_FWD})

View File

@@ -1,14 +1,16 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances =
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NGCHW,
GKCYX,
@@ -20,23 +22,19 @@ using device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances =
BF16,
PassThrough,
PassThrough,
PassThrough>>>;
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
template <int Shards, int ShardIndex>
void add_device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard([[maybe_unused]]
device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_comp_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_bf16_comp_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwdDefault>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_bf16_comp_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwdDefault>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -3,11 +3,13 @@
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances =
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NGCHW,
GKCYX,
@@ -19,40 +21,32 @@ using device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances =
BF16,
PassThrough,
PassThrough,
PassThrough>>>;
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
template <int Shards, int ShardIndex>
void add_device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances_shard(
device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_bf16_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_bf16_instances<2,
device_grouped_conv_fwd_xdl_bf16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwdDefault>,
Shards,
ShardIndex>{});
ConvFwdDefault>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_bf16_instances<2,
device_grouped_conv_fwd_xdl_bf16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
ConvFwd1x1P0>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_bf16_instances<2,
device_grouped_conv_fwd_xdl_bf16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
ConvFwd1x1S1P0>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -3,11 +3,13 @@
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances =
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NGCHW,
GKCYX,
@@ -19,40 +21,32 @@ using device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances =
F16,
PassThrough,
PassThrough,
PassThrough>>>;
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
template <int Shards, int ShardIndex>
void add_device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances_shard(
device_grouped_conv2d_fwd_xdl_ngchw_gkcyx_ngkhw_f16_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwdDefault>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwdDefault>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwd1x1P0>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f16_instances<2,
NGCHW,
GKCYX,
Empty_Tuple,
NGKHW,
ConvFwd1x1S1P0>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,62 +1,66 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
Empty_Tuple,
NGKDHW,
F16,
F16,
NHWGK,
int8_t,
int8_t,
Empty_Tuple,
F16,
int8_t,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdDefault,
Interwave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1P0,
Interwave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1S1P0,
Interwave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdOddC,
Interwave>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,80 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
int8_t,
int8_t,
Empty_Tuple,
int8_t,
PassThrough,
PassThrough,
PassThrough>>>;
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
template <int Shards, int ShardIndex>
void add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard(
device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances& instances)
{
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdDefault,
Interwave>,
Shards,
ShardIndex>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1P0,
Interwave>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1S1P0,
Interwave>,
Shards,
ShardIndex>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdOddC,
Interwave>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,62 +1,66 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
Empty_Tuple,
NGKDHW,
F32,
F32,
NHWGK,
int8_t,
int8_t,
Empty_Tuple,
F32,
int8_t,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Interwave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdDefault,
Intrawave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Interwave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1P0,
Intrawave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Interwave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1S1P0,
Intrawave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdOddC,
Intrawave>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,80 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
int8_t,
int8_t,
Empty_Tuple,
int8_t,
PassThrough,
PassThrough,
PassThrough>>>;
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
template <int Shards, int ShardIndex>
void add_device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard(
device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances& instances)
{
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdDefault,
Intrawave>,
Shards,
ShardIndex>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1P0,
Intrawave>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwd1x1S1P0,
Intrawave>,
Shards,
ShardIndex>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_int8_mem_instances<2,
NHWGC,
GKYXC,
Empty_Tuple,
NHWGK,
ConvFwdOddC,
Intrawave>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -11,6 +11,8 @@ set(GROUPED_CONV3D_FWD
xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp
xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_16x16_instance.cpp
@@ -30,13 +32,23 @@ set(GROUPED_CONV3D_FWD
xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instance.cpp
xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_comp_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_2x_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_2x_instance.cpp
xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_part2_instance.cpp
@@ -59,99 +71,6 @@ xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_comp_instance.cp
wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp
wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp
)
# Add generated files for sharded instantiations.
include(ShardInstantiation)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances
TEMPLATE_FILE xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instance.in
NUM_SHARDS 8
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl
)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances
TEMPLATE_FILE xdl/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.in
NUM_SHARDS 8
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl
)
set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instance.in
NUM_SHARDS 10
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instance.in
NUM_SHARDS 10
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instance.in
NUM_SHARDS 10
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instance.in
NUM_SHARDS 10
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instance.in
NUM_SHARDS 10
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances
TEMPLATE_FILE xdl/mem/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instance.in
NUM_SHARDS 10
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/mem
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances
TEMPLATE_FILE xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.in
NUM_SHARDS 12
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/comp
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances
TEMPLATE_FILE xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instance.in
NUM_SHARDS 12
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/comp
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances
TEMPLATE_FILE xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.in
NUM_SHARDS 12
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/comp
)
generate_sharded_instantiations(
INSTANCES_NAME device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances
TEMPLATE_FILE xdl/comp/device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instance.in
NUM_SHARDS 12
SRC_LIST GROUPED_CONV3D_FWD
OUTPUT_DIR ${GENERATED_DIR}/xdl/comp
)
if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)
list(APPEND GROUPED_CONV3D_FWD

View File

@@ -0,0 +1,111 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/host_utility/device_prop.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>{});
if(ck::get_device_name() != "gfx950")
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_part2<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_part2<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_part2<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>{});
}
if(ck::get_device_name() == "gfx950")
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_2x<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_2x<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances_2x<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>{});
}
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,66 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard(
device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances& instances)
{
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -0,0 +1,111 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/host_utility/device_prop.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
F16,
F16,
Empty_Tuple,
F16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>{});
if(ck::get_device_name() != "gfx950")
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances_part2<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances_part2<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances_part2<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>{});
}
if(ck::get_device_name() == "gfx950")
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances_2x<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances_2x<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances_2x<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>{});
}
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,65 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
F16,
F16,
Empty_Tuple,
F16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard(
device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances& instances)
{
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwdDefault>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NDHWGC,
GKZYXC,
Empty_Tuple,
NDHWGK,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -0,0 +1,54 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,65 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances& instances)
{
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -0,0 +1,54 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
F16,
F16,
Empty_Tuple,
F16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,63 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_comp_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
F16,
F16,
Empty_Tuple,
F16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_comp_instances& instances)
{
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_comp_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -0,0 +1,53 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -0,0 +1,53 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
F16,
F16,
Empty_Tuple,
F16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 0>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 1>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 2>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 3>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 4>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 5>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 6>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instance.inc"
namespace ck::tensor_operation::device::instance {
template void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_sharded<8, 7>(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances);
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,14 +1,15 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances =
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
@@ -20,43 +21,35 @@ using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances =
BF16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_bf16_instances<3,
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_bf16_instances<3,
ConvFwdDefault,
Interwave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_bf16_instances<3,
ConvFwd1x1P0,
Interwave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
ConvFwd1x1S1P0,
Interwave>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,64 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances& instances)
{
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Interwave>,
Shards,
ShardIndex>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Interwave>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Interwave>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -0,0 +1,55 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Intrawave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Intrawave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Intrawave>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -1,65 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances =
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
BF16,
BF16,
Empty_Tuple,
BF16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances& instances)
{
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Intrawave>,
Shards,
ShardIndex>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Intrawave>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_bf16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Intrawave>,
Shards,
ShardIndex>{});
}
} // namespace ck::tensor_operation::device::instance

View File

@@ -1,14 +1,15 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances =
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
@@ -20,43 +21,35 @@ using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances =
F16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_instances<3,
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_instances<3,
ConvFwdDefault,
Interwave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0>,
Shards,
ShardIndex>{});
add_device_operation_instances(
instances,
util::filter_tuple_by_modulo_t<device_grouped_conv_fwd_xdl_f16_instances<3,
ConvFwd1x1P0,
Interwave>{});
add_device_operation_instances(instances,
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0>,
Shards,
ShardIndex>{});
ConvFwd1x1S1P0,
Interwave>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -3,11 +3,13 @@
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances =
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
@@ -19,44 +21,35 @@ using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances
F16,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Interwave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Intrawave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Interwave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Intrawave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Interwave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f16_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Intrawave>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -3,11 +3,13 @@
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances =
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_inter_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
@@ -19,44 +21,35 @@ using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances
F32,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Interwave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Interwave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Interwave>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck

View File

@@ -3,11 +3,13 @@
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_mem_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/utility/filter_tuple.hpp"
namespace ck::tensor_operation::device::instance {
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances =
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
NGCDHW,
GKCZYX,
@@ -19,44 +21,35 @@ using device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances
F32,
PassThrough,
PassThrough,
PassThrough>>>;
template <int Shards, int ShardIndex>
void add_device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances_shard(
device_grouped_conv3d_fwd_xdl_ngcdhw_gkczyx_ngkdhw_f32_mem_intra_instances& instances)
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwdDefault,
Intrawave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1P0,
Intrawave>{});
add_device_operation_instances(instances,
ck::util::filter_tuple_by_modulo_t<
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Intrawave>,
Shards,
ShardIndex>{});
device_grouped_conv_fwd_xdl_f32_mem_instances<3,
NGCDHW,
GKCZYX,
Empty_Tuple,
NGKDHW,
ConvFwd1x1S1P0,
Intrawave>{});
}
} // namespace ck::tensor_operation::device::instance
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck