diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp index 420908444d..bf46c02c75 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp @@ -13,10 +13,7 @@ namespace tensor_operation { namespace device { namespace instance { -using BF16 = ck::bhalf_t; -using F16 = ck::half_t; -using F32 = float; -using I8 = int8_t; +using I8 = int8_t; template using S = ck::Sequence; @@ -36,35 +33,6 @@ static constexpr auto ConvFwd3x3 = ConvolutionForwardSpecialization::Filter3x3; static constexpr auto GemmMNKPadding = GemmSpecialization::MNKPadding; -template , - typename OutElementOp = PassThrough> -using device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances = std::tuple< - // clang-format off - //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MWmma| NWmma| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| Pipeline scheduler | Pipeline version | AComp | BComp | Merge | - //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Size| Block| Block| Block| | | WMMA| WMMA| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MWmmaPerWave| NWmmaPerWave| _MBlock_MWaveMPerWmma| ScalarPerVector| | | Type | Type | Groups | - //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerWmma| _NWaveNPerWmma| | | | | | - //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - // Instances with NumGroupsPerBatch > 1 - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 4, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 4, 1, 1, 1, S<1, 16, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1, F16, F16, 8> -#ifndef ONE_INSTANCE_PER_LIST - , - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 4, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 4, 1, 1, 1, S<1, 16, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1, F16, F16, 16>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 4, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 4, 1, 1, 1, S<1, 16, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1, F16, F16, 32>, - // "2x" instances - // TODO: I had to double BK1 from 4 to 8 for these instances to make them give correct results. Figure out why. - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 4, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 4, 1, 1, 1, S<1, 16, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1, F16, F16, 8>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 4, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 4, 1, 1, 1, S<1, 16, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1, F16, F16, 16>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 4, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 4, 1, 1, 1, S<1, 16, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1, F16, F16, 32> -#endif - // clang-format on - >; - template && is_same_v && is_same_v) { - add_device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_ngchw_gkcyx_ngkhw_f16_instances( - op_ptrs); add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_f16_instances( op_ptrs); add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_f16_16x16_instances( @@ -1055,8 +1051,6 @@ struct DeviceOperationInstanceFactory && is_same_v && is_same_v) { - add_device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instances( - op_ptrs); add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_instances( op_ptrs); add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_16x16_instances( diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bias_clamp.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bias_clamp.hpp index 37e3b46617..72dd49cea3 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bias_clamp.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bias_clamp.hpp @@ -300,8 +300,6 @@ struct DeviceOperationInstanceFactory>>& instances); -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances( - std::vector, - NHWGK, - F16, - F16, - Tuple, - F16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_comp_instances( std::vector>>& instances); -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances( - std::vector, - NDHWGK, - F16, - F16, - Tuple, - F16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances( std::vector>>& instances); -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances( - std::vector, - NHWGK, - F16, - F16, - Tuple<>, - F16, - PassThrough, - PassThrough, - Clamp>>>& instances); - void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_comp_instances( std::vector>>& instances); -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances( - std::vector, - NDHWGK, - F16, - F16, - Tuple<>, - F16, - PassThrough, - PassThrough, - Clamp>>>& instances); - void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances( std::vector>>& instances); - -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_ngchw_gkcyx_ngkhw_f16_instances( - std::vector>>& instances); -#endif - #ifdef CK_ENABLE_INT8 void add_device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_int8_instances( std::vector>>& instances); #endif -#ifdef CK_ENABLE_FP16 -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances( - std::vector>>& instances); -#endif - -// grouped conv3d forward, NGCDHW/GKCZYX/NGKDHW -#ifdef CK_ENABLE_FP16 -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instances( - std::vector>>& instances); -#endif - } // namespace instance } // namespace device } // namespace tensor_operation diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt index b5cb68990d..441b2e8c30 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt @@ -128,10 +128,8 @@ set(GROUPED_CONV2D_FWD # merged groups # NHWGC, GKYXC, NHWGK - wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp # NGCHW, GKCYX, NGKHW - wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_ngchw_gkcyx_ngkhw_f16_instance.cpp #mem # NHWGC, GKYXC, NHWGK intra diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_ngchw_gkcyx_ngkhw_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_ngchw_gkcyx_ngkhw_f16_instance.cpp deleted file mode 100644 index edd109c4da..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_ngchw_gkcyx_ngkhw_f16_instance.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_ngchw_gkcyx_ngkhw_f16_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwdDefault>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwd3x3>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp deleted file mode 100644 index d6da0f4915..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/merged_groups/device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdDefault>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd3x3>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt index 2ce6c2d032..ac9c72d382 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt @@ -42,7 +42,6 @@ add_instance_library(device_grouped_conv2d_fwd_bias_clamp_instance wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_instance.cpp wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp - wmma/merged_groups/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp wmma/comp/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/merged_groups/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/merged_groups/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp deleted file mode 100644 index 0d98791036..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/merged_groups/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances( - std::vector, - NHWGK, - F16, - F16, - Tuple, - F16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwdDefault, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd3x3, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt index f06ef604dd..6ac9c4aa09 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt @@ -42,7 +42,6 @@ add_instance_library(device_grouped_conv2d_fwd_clamp_instance wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_instance.cpp wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp - wmma/merged_groups/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp wmma/comp/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/merged_groups/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/merged_groups/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp deleted file mode 100644 index 86bae39d88..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/merged_groups/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances( - std::vector, - NHWGK, - F16, - F16, - Tuple<>, - F16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwdDefault, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd3x3, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt index f35cafb44e..9f38245ee3 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt @@ -77,9 +77,6 @@ set(GROUPED_CONV3D_FWD wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_16x16_instance.cpp - wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp - wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp - wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp deleted file mode 100644 index 92dae0a8f1..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwdDefault>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd3x3>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp deleted file mode 100644 index bf26fe02c7..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwdDefault>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwd3x3>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt index 5056f1a6f5..ec5f8193a3 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt @@ -37,7 +37,6 @@ set(GROUPED_CONV3D_FWD wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp - wmma/merged_groups/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp wmma/comp/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/merged_groups/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/merged_groups/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp deleted file mode 100644 index c230756bf0..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/merged_groups/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances( - std::vector, - NDHWGK, - F16, - F16, - Tuple, - F16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwdDefault, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd3x3, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt index 6c2ff6cd6a..7ed8f3f3a1 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt @@ -37,7 +37,6 @@ set(GROUPED_CONV3D_FWD wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp - wmma/merged_groups/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp wmma/comp/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/merged_groups/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/merged_groups/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp deleted file mode 100644 index 8084f11954..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/merged_groups/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances( - std::vector, - NDHWGK, - F16, - F16, - Tuple<>, - F16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwdDefault, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_f16_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd3x3, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck