From 4fd168ea67f4be0a4d70182a956bcfb76aabfd90 Mon Sep 17 00:00:00 2001 From: Wojciech Laskowski Date: Tue, 2 Dec 2025 14:57:22 +0000 Subject: [PATCH] Re-factor of BF16 instance tuples --- ...conv_fwd_wmma_cshufflev3_comp_instance.hpp | 37 ------ ...uped_conv_fwd_wmma_cshufflev3_instance.hpp | 45 ++++--- ..._conv_fwd_wmma_cshufflev3_mem_instance.hpp | 26 ---- .../gpu/grouped_convolution_forward.hpp | 32 ----- ...grouped_convolution_forward_bias_clamp.hpp | 16 --- ...ion_forward_bias_clamp_wmma_cshufflev3.inc | 113 ------------------ .../gpu/grouped_convolution_forward_clamp.hpp | 16 --- ...volution_forward_clamp_wmma_cshufflev3.inc | 113 ------------------ ...nvolution_forward_comp_wmma_cshufflev3.inc | 65 ---------- ...tion_forward_mem_inter_wmma_cshufflev3.inc | 65 ---------- ...tion_forward_mem_intra_wmma_cshufflev3.inc | 65 ---------- ...ed_convolution_forward_wmma_cshufflev3.inc | 55 --------- .../gpu/grouped_conv2d_fwd/CMakeLists.txt | 15 --- ...v3_ngchw_gkcyx_ngkhw_bf16_comp_instance.in | 44 ------- ...3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp | 66 ---------- ...fflev3_gnhwc_gkyxc_gnhwk_bf16_instance.cpp | 48 ++++---- ..._ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp | 54 --------- ...fflev3_ngchw_gkyxc_ngkhw_bf16_instance.cpp | 36 +++++- ..._nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp | 56 --------- ...hw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp | 39 ------ ...hw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp | 39 ------ ...gc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp | 69 ----------- ...gc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp | 69 ----------- .../CMakeLists.txt | 4 - ...3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp | 63 ---------- ..._nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp | 62 ---------- ...gc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp | 65 ---------- ...gc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp | 65 ---------- .../grouped_conv2d_fwd_clamp/CMakeLists.txt | 4 - ...3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp | 63 ---------- ..._nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp | 62 ---------- ...gc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp | 65 ---------- ...gc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp | 65 ---------- .../gpu/grouped_conv3d_fwd/CMakeLists.txt | 33 +---- ...ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.in | 65 ---------- ...ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.in | 64 ---------- ...ev3_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp | 36 +++--- ...hwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp | 55 --------- ...v3_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp} | 16 ++- ...lev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.in | 6 +- ..._gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp | 58 --------- ..._gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp | 58 --------- ...w_gkczyx_ngkdhw_bf16_mem_inter_instance.in | 66 ---------- ...w_gkczyx_ngkdhw_bf16_mem_intra_instance.in | 66 ---------- .../CMakeLists.txt | 4 - ...dhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp | 62 ---------- ...hwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp | 61 ---------- ..._gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp | 64 ---------- ..._gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp | 64 ---------- .../grouped_conv3d_fwd_clamp/CMakeLists.txt | 4 - ...dhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp | 62 ---------- ...hwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp | 61 ---------- ..._gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp | 64 ---------- ..._gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp | 64 ---------- 54 files changed, 108 insertions(+), 2626 deletions(-) delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instance.in delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/comp/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/comp/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.in delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.in delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp rename library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/{device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp => device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp} (79%) delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instance.in delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instance.in delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/comp/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/comp/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp delete mode 100644 library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp index 044866dac2..7febebf94d 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp @@ -52,43 +52,6 @@ static constexpr auto GemmMNKPadding = GemmSpecialization::MNKPadding; static constexpr auto Intrawave = BlockGemmPipelineScheduler::Intrawave; static constexpr auto Interwave = BlockGemmPipelineScheduler::Interwave; -template , - typename OutElementOp = PassThrough> -using device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances = std::tuple< - // clang-format off - //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MWmma| NWmma| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| Pipeline scheduler | Pipeline version | - //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Size| Block| Block| Block| | | WMMA| WMMA| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MWmmaPerWave| NWmmaPerWave| _MBlock_MWaveMPerWmma| ScalarPerVector| | | - //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerWmma| _NWaveNPerWmma| | | - //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - // Compute friendly - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1> -#ifndef ONE_INSTANCE_PER_LIST - , - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Interwave, BlockGemmPipelineVersion::v1>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Interwave, BlockGemmPipelineVersion::v1>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Interwave, BlockGemmPipelineVersion::v1>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3>, - // "2x" instances - // DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Interwave, BlockGemmPipelineVersion::v1> // Assert broken - // "part 2" instances - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 2, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3>, - // AGPR Spill when use permuted lds layout. so, use padding for these two. - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 2, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 2, 1, S<1, 64, 1, 4>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 32, 1, 8>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v3> -#endif - // clang-format on - >; - template , typename OutElementOp = PassThrough> -using device_grouped_conv_fwd_wmma_cshufflev3_bf16_generic_instances = std::tuple< +using device_grouped_conv_fwd_wmma_cshufflev3_bf16_ngchw_instances = std::tuple< + // clang-format off + //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MWmma| NWmma| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| Pipeline scheduler | Pipeline version | + //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Size| Block| Block| Block| | | WMMA| WMMA| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MWmmaPerWave| NWmmaPerWave| _MBlock_MWaveMPerWmma| ScalarPerVector| | | + //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerWmma| _NWaveNPerWmma| | | + //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + // generic instance + DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, 1, 1, S<1, 16, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1> + // clang-format on + >; + +template , + typename OutElementOp = PassThrough> +using device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances = std::tuple< // clang-format off //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MWmma| NWmma| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| Pipeline scheduler | Pipeline version | //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Size| Block| Block| Block| | | WMMA| WMMA| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MWmmaPerWave| NWmmaPerWave| _MBlock_MWaveMPerWmma| ScalarPerVector| | | @@ -126,30 +145,6 @@ using device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances = std::tuple< // clang-format on >; -template , - typename OutElementOp = PassThrough> -using device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances = std::tuple< - // clang-format off - //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MWmma| NWmma| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| Pipeline scheduler | Pipeline version | - //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Size| Block| Block| Block| | | WMMA| WMMA| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MWmmaPerWave| NWmmaPerWave| _MBlock_MWaveMPerWmma| ScalarPerVector| | | - //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerWmma| _NWaveNPerWmma| | | - //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 2, 8, 1, 1, 1, S<1, 32, 1, 4>, 1, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1> -#ifndef ONE_INSTANCE_PER_LIST - , - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 2, 8, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 1, 8, 1, 1, 1, S<1, 32, 1, 4>, 2, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 4, 8, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 4, 8, 1, 1, 1, S<1, 32, 1, 4>, 4, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1, 1, 1, S<1, 32, 1, 4>, 8, BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion::v1> -#endif - // clang-format on - >; - template , - typename OutElementOp = PassThrough> -using device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances = std::tuple< - // clang-format off - //########################################| NumDim| A| B| Ds| E| AData| BData| AccData| CShuffle| Ds| EData| A| B| CDE| ConvForward| GEMM| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MWmma| NWmma| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer| Pipeline scheduler | Pipeline version | - //########################################| Spatial| Layout| Layout| Layout| Layout| Type| Type| Type| DataType| DataType| Type| Elementwise| Elementwise| Elementwise| Specialization| Specialization| Size| Block| Block| Block| | | WMMA| WMMA| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MWmmaPerWave| NWmmaPerWave| _MBlock_MWaveMPerWmma| ScalarPerVector| | | - //########################################| | | | | | | | | | | | Operation| Operation| Operation| | | | | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NWaveNPerWmma| _NWaveNPerWmma| | | - //########################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - // Latency friendly - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 16, 1, 8>, 2, BlkGemmPipeSched, BlockGemmPipelineVersion::v1> -#ifndef ONE_INSTANCE_PER_LIST - , - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<16, 4, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 16, 1, 4>, 4, BlkGemmPipeSched, BlockGemmPipelineVersion::v1>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 8, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 16, 1, 4>, 4, BlkGemmPipeSched, BlockGemmPipelineVersion::v1>, - DeviceGroupedConvFwdMultipleABD_Wmma_CShuffle_V3, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, S<8, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0, 1, 1, S<1, 16, 1, 8>, 4, BlkGemmPipeSched, BlockGemmPipelineVersion::v1> -#endif - // clang-format on - >; - template >>& instances); -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - // void // add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances( // std::vector>>& instances); -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instances( std::vector>>& instances); -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - // void // add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances( // std::vector>>& instances); - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances); - #endif #ifdef CK_ENABLE_FP16 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_clamp.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_clamp.hpp index 56738df10a..c4b54c1e50 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_clamp.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_clamp.hpp @@ -281,18 +281,10 @@ struct DeviceOperationInstanceFactory>>& instances); -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - // void // add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances( // std::vector>>& instances); -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instances( std::vector>>& instances); -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - // void // add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances( // std::vector>>& instances); - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances); - #endif #ifdef CK_ENABLE_FP16 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_comp_wmma_cshufflev3.inc b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_comp_wmma_cshufflev3.inc index 7381df67b3..1c510ebc93 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_comp_wmma_cshufflev3.inc +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_comp_wmma_cshufflev3.inc @@ -9,22 +9,6 @@ namespace device { namespace instance { // grouped conv2d forward, NHWGC/GKYXC/NHWGK -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instances( - std::vector>>& instances); -#endif - #ifdef CK_ENABLE_FP16 void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_comp_instances( std::vector>>& instances); #endif // CK_ENABLE_FP16 -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instances( - std::vector>>& instances); -#endif - -#ifdef CK_ENABLE_BF16 -// grouped conv3d forward, NDHWGC/GKZYXC/NDHWGK -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances( - std::vector>>& instances); -#endif - #ifdef CK_ENABLE_FP16 void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances( std::vector>>& instances); #endif -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances( - std::vector>>& instances); -#endif - } // namespace instance } // namespace device } // namespace tensor_operation diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_inter_wmma_cshufflev3.inc b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_inter_wmma_cshufflev3.inc index f2f266ee98..4e2ab52235 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_inter_wmma_cshufflev3.inc +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_inter_wmma_cshufflev3.inc @@ -9,22 +9,6 @@ namespace device { namespace instance { // grouped conv2d forward, NHWGC/GKYXC/NHWGK -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances( - std::vector>>& instances); -#endif - #ifdef CK_ENABLE_FP16 void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances( std::vector>>& instances); #endif -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_inter_instances( - std::vector>>& instances); -#endif - -#ifdef CK_ENABLE_BF16 -// grouped conv3d forward, NDHWGC/GKZYXC/NDHWGK -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances( - std::vector>>& instances); -#endif - #ifdef CK_ENABLE_FP16 void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances( std::vector>>& instances); #endif -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances( - std::vector>>& instances); -#endif - } // namespace instance } // namespace device } // namespace tensor_operation diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_intra_wmma_cshufflev3.inc b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_intra_wmma_cshufflev3.inc index db9162c96c..3edc47844a 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_intra_wmma_cshufflev3.inc +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_mem_intra_wmma_cshufflev3.inc @@ -9,22 +9,6 @@ namespace device { namespace instance { // grouped conv2d forward, NHWGC/GKYXC/NHWGK -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances( - std::vector>>& instances); -#endif - #ifdef CK_ENABLE_FP16 void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances( std::vector>>& instances); #endif -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_intra_instances( - std::vector>>& instances); -#endif - -#ifdef CK_ENABLE_BF16 -// grouped conv3d forward, NDHWGC/GKZYXC/NDHWGK -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances( - std::vector>>& instances); -#endif - #ifdef CK_ENABLE_FP16 void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances( std::vector>>& instances); #endif -#ifdef CK_ENABLE_BF16 -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances( - std::vector>>& instances); -#endif - } // namespace instance } // namespace device } // namespace tensor_operation diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc index f87e44ee87..455028a103 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_wmma_cshufflev3.inc @@ -105,20 +105,6 @@ void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instan PassThrough, PassThrough, PassThrough>>>& instances); - -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instances( - std::vector>>& instances); #endif #ifdef CK_ENABLE_FP16 @@ -261,20 +247,6 @@ void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_instan PassThrough, PassThrough, PassThrough>>>& instances); - -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_16x16_instances( - std::vector>>& instances); #endif #ifdef CK_ENABLE_BF16 @@ -342,20 +314,6 @@ void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_ins PassThrough, PassThrough, PassThrough>>>& instances); - -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances( - std::vector>>& instances); #endif #ifdef CK_ENABLE_FP16 @@ -534,19 +492,6 @@ void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_ins PassThrough, PassThrough, PassThrough>>>& instances); -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instances( - std::vector>>& instances); #endif } // namespace instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt index cb8c0a229a..152c1aad8f 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt @@ -117,7 +117,6 @@ set(GROUPED_CONV2D_FWD # NHWGC, GKYXC, NHWGK wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp - wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_16x16_instance.cpp wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_int8_instance.cpp # NGCHW, GKYXC, NGKHW @@ -125,7 +124,6 @@ set(GROUPED_CONV2D_FWD wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkyxc_ngkhw_f16_instance.cpp wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkyxc_ngkhw_int8_instance.cpp # NGCHW, GKCYX, NGKHW - wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_f16_16x16_instance.cpp # merged groups @@ -139,20 +137,15 @@ set(GROUPED_CONV2D_FWD #mem # NHWGC, GKYXC, NHWGK intra - wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp # NHWGC, GKYXC, NHWGK inter - wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp # NGCHW, GKCYX, NGKHW intra - wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_f16_mem_intra_instance.cpp # NGCHW, GKCYX, NGKHW inter - wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_f16_mem_inter_instance.cpp #comp # NHWGC, GKYXC, NHWGK - wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_comp_instance.cpp wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_int8_comp_instance.cpp # NGCHW, GKCYX, NGKHW @@ -202,14 +195,6 @@ generate_sharded_instantiations( OUTPUT_DIR ${GENERATED_DIR}/xdl/comp ) set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated) -generate_sharded_instantiations( - INSTANCES_NAME device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instances - TEMPLATE_FILE wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instance.in - NUM_SHARDS 16 - SRC_LIST GROUPED_CONV2D_FWD - OUTPUT_DIR ${GENERATED_DIR}/wmma/comp -) -set(GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated) generate_sharded_instantiations( INSTANCES_NAME device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances TEMPLATE_FILE xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instance.in diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instance.in b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instance.in deleted file mode 100644 index 57eb2466a2..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instance.in +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/host_utility/device_prop.hpp" -#include "ck/utility/filter_tuple.hpp" - -namespace ck::tensor_operation::device::instance { - -using device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instances = - std::vector>>; - -// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k] -template -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instances_shard( - [[maybe_unused]] device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_comp_instances& - instances) -{ - add_device_operation_instances( - instances, - ck::util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwdDefault>, - Shards, - ShardIndex>{}); -} - -} // namespace ck::tensor_operation::device::instance \ No newline at end of file diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp deleted file mode 100644 index b56c9f196d..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/comp/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdDefault>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1P0>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1S1P0>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdOddC>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_gnhwc_gkyxc_gnhwk_bf16_instance.cpp index e5922cd27f..df9f8e8609 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_gnhwc_gkyxc_gnhwk_bf16_instance.cpp +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_gnhwc_gkyxc_gnhwk_bf16_instance.cpp @@ -24,39 +24,39 @@ void add_device_grouped_conv2d_fwd_wmma_cshufflev3_gnhwc_gkyxc_gnhwk_bf16_instan { add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<2, - GNHWC, - GKYXC, - Empty_Tuple, - GNHWK, - ConvFwdDefault>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances<2, + GNHWC, + GKYXC, + Empty_Tuple, + GNHWK, + ConvFwdDefault>{}); add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<2, - GNHWC, - GKYXC, - Empty_Tuple, - GNHWK, - ConvFwd1x1P0>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances<2, + GNHWC, + GKYXC, + Empty_Tuple, + GNHWK, + ConvFwd1x1P0>{}); add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<2, - GNHWC, - GKYXC, - Empty_Tuple, - GNHWK, - ConvFwd1x1S1P0>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances<2, + GNHWC, + GKYXC, + Empty_Tuple, + GNHWK, + ConvFwd1x1S1P0>{}); add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<2, - GNHWC, - GKYXC, - Empty_Tuple, - GNHWK, - ConvFwdOddC>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances<2, + GNHWC, + GKYXC, + Empty_Tuple, + GNHWK, + ConvFwdOddC>{}); } } // namespace instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp deleted file mode 100644 index 68b28e4c05..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_16x16_instance.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_16x16_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwdDefault>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwd1x1P0>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwd1x1S1P0>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkyxc_ngkhw_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkyxc_ngkhw_bf16_instance.cpp index 5ea8c0c8ab..c6cb5d5351 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkyxc_ngkhw_bf16_instance.cpp +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkyxc_ngkhw_bf16_instance.cpp @@ -24,12 +24,36 @@ void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkyxc_ngkhw_bf16_instan { add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_generic_instances<2, - NGCHW, - GKYXC, - Empty_Tuple, - NGKHW, - ConvFwdDefault>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_ngchw_instances<2, + NGCHW, + GKYXC, + Empty_Tuple, + NGKHW, + ConvFwdDefault>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_ngchw_instances<2, + NGCHW, + GKYXC, + Empty_Tuple, + NGKHW, + ConvFwd1x1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_ngchw_instances<2, + NGCHW, + GKYXC, + Empty_Tuple, + NGKHW, + ConvFwd1x1S1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_ngchw_instances<2, + NGCHW, + GKYXC, + Empty_Tuple, + NGKHW, + ConvFwdOddC>{}); } } // namespace instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp deleted file mode 100644 index 5cae4c5548..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdDefault>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1P0>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1S1P0>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp deleted file mode 100644 index 4af95245b3..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_inter_instance.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_inter_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwdDefault, - Interwave>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp deleted file mode 100644 index 0e73a7ca43..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_intra_instance.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_ngchw_gkcyx_ngkhw_bf16_mem_intra_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NGCHW, - GKCYX, - Empty_Tuple, - NGKHW, - ConvFwdDefault, - Intrawave>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp deleted file mode 100644 index edee843cca..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdDefault, - Interwave>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1P0, - Interwave>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1S1P0, - Interwave>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdOddC, - Interwave>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp deleted file mode 100644 index 89d1058ad0..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/mem/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdDefault, - Intrawave>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1P0, - Intrawave>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwd1x1S1P0, - Intrawave>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Empty_Tuple, - NHWGK, - ConvFwdOddC, - Intrawave>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt index 2db24c0c73..5b30f98453 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/CMakeLists.txt @@ -39,11 +39,7 @@ add_instance_library(device_grouped_conv2d_fwd_bias_clamp_instance # WMMA CSHUFFLE V3 wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp - wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp wmma/merged_groups/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp - wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp - wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp - wmma/comp/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_instance.cpp wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/comp/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/comp/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp deleted file mode 100644 index 10b942d4d9..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/comp/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwdDefault, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1P0, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1S1P0, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp deleted file mode 100644 index 98354619cf..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwdDefault, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1P0, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1S1P0, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp deleted file mode 100644 index 6646cd0c83..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwdDefault, - Interwave, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1P0, - Interwave, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1S1P0, - Interwave, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp deleted file mode 100644 index 096ada2922..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/wmma/mem/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwdDefault, - Intrawave, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1P0, - Intrawave, - Tuple, - AddClamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple, - NHWGK, - ConvFwd1x1S1P0, - Intrawave, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt index 2ea55d0340..f5b23d2573 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/CMakeLists.txt @@ -39,11 +39,7 @@ add_instance_library(device_grouped_conv2d_fwd_clamp_instance # WMMA CSHUFFLE V3 wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp - wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp wmma/merged_groups/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp - wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp - wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp - wmma/comp/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_instance.cpp wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/comp/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/comp/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp deleted file mode 100644 index 93a87c0d6c..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/comp/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwdDefault, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1P0, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1S1P0, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp deleted file mode 100644 index cd0a747383..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwdDefault, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1P0, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1S1P0, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp deleted file mode 100644 index d67cf409cf..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwdDefault, - Interwave, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1P0, - Interwave, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1S1P0, - Interwave, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp deleted file mode 100644 index 03900bfd88..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/wmma/mem/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { -void add_device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances( - std::vector, - NHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwdDefault, - Intrawave, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1P0, - Intrawave, - Tuple<>, - Clamp>{}); - - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<2, - NHWGC, - GKYXC, - Tuple<>, - NHWGK, - ConvFwd1x1S1P0, - Intrawave, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt index 6f303c0693..e9271dc52b 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt @@ -73,21 +73,18 @@ set(GROUPED_CONV3D_FWD wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_gndhwc_gkzyxc_gndhwk_int8_instance.cpp wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp - wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instance.cpp wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp - wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_16x16_instance.cpp + wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp wmma/merged_groups/device_grouped_conv3d_fwd_wmma_cshufflev3_merged_groups_ngcdhw_gkczyx_ngkdhw_f16_instance.cpp - wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp - wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp ) # Add generated files for sharded instantiations. @@ -200,13 +197,6 @@ generate_sharded_instantiations( OUTPUT_DIR ${GENERATED_DIR}/wmma ) -generate_sharded_instantiations( - INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances - TEMPLATE_FILE wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instance.in - NUM_SHARDS 16 - SRC_LIST GROUPED_CONV3D_FWD - OUTPUT_DIR ${GENERATED_DIR}/wmma/mem -) generate_sharded_instantiations( INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instances TEMPLATE_FILE wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_mem_inter_instance.in @@ -215,13 +205,6 @@ generate_sharded_instantiations( OUTPUT_DIR ${GENERATED_DIR}/wmma/mem ) -generate_sharded_instantiations( - INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances - TEMPLATE_FILE wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instance.in - NUM_SHARDS 16 - SRC_LIST GROUPED_CONV3D_FWD - OUTPUT_DIR ${GENERATED_DIR}/wmma/mem -) generate_sharded_instantiations( INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instances TEMPLATE_FILE wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_mem_intra_instance.in @@ -230,13 +213,6 @@ generate_sharded_instantiations( OUTPUT_DIR ${GENERATED_DIR}/wmma/mem ) -generate_sharded_instantiations( - INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances - TEMPLATE_FILE wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.in - NUM_SHARDS 16 - SRC_LIST GROUPED_CONV3D_FWD - OUTPUT_DIR ${GENERATED_DIR}/wmma/comp -) generate_sharded_instantiations( INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances TEMPLATE_FILE wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_comp_instance.in @@ -244,13 +220,6 @@ generate_sharded_instantiations( SRC_LIST GROUPED_CONV3D_FWD OUTPUT_DIR ${GENERATED_DIR}/wmma/comp ) -generate_sharded_instantiations( - INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances - TEMPLATE_FILE wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.in - NUM_SHARDS 16 - SRC_LIST GROUPED_CONV3D_FWD - OUTPUT_DIR ${GENERATED_DIR}/wmma/comp -) generate_sharded_instantiations( INSTANCES_NAME device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_comp_instances TEMPLATE_FILE wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_f16_comp_instance.in diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.in b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.in deleted file mode 100644 index 3246483de0..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.in +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/utility/filter_tuple.hpp" - -namespace ck::tensor_operation::device::instance { - -using device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances = - std::vector>>; - -template -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard( - device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances& instances) -{ - add_device_operation_instances( - instances, - util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwdDefault>, - Shards, - ShardIndex>{}); - - add_device_operation_instances( - instances, - util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1P0>, - Shards, - ShardIndex>{}); - - add_device_operation_instances( - instances, - util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1S1P0>, - Shards, - ShardIndex>{}); -} - -} // namespace ck::tensor_operation::device::instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.in b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.in deleted file mode 100644 index f2e2f30fd9..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/comp/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instance.in +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/utility/filter_tuple.hpp" - -namespace ck::tensor_operation::device::instance { - -using device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances = - std::vector>>; -template -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances_shard( - device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_comp_instances& instances) -{ - add_device_operation_instances( - instances, - util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwdDefault>, - Shards, - ShardIndex>{}); - - add_device_operation_instances( - instances, - util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwd1x1P0>, - Shards, - ShardIndex>{}); - - add_device_operation_instances( - instances, - util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwd1x1S1P0>, - Shards, - ShardIndex>{}); -} - -} // namespace ck::tensor_operation::device::instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp index 7bd0141cef..c812a11273 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp @@ -25,28 +25,28 @@ void add_device_grouped_conv3d_fwd_wmma_cshufflev3_gndhwc_gkzyxc_gndhwk_bf16_ins { add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<3, - GNDHWC, - GKZYXC, - Empty_Tuple, - GNDHWK, - ConvFwdDefault>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances<3, + GNDHWC, + GKZYXC, + Empty_Tuple, + GNDHWK, + ConvFwdDefault>{}); add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<3, - GNDHWC, - GKZYXC, - Empty_Tuple, - GNDHWK, - ConvFwd1x1P0>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances<3, + GNDHWC, + GKZYXC, + Empty_Tuple, + GNDHWK, + ConvFwd1x1P0>{}); add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_instances<3, - GNDHWC, - GKZYXC, - Empty_Tuple, - GNDHWK, - ConvFwd1x1S1P0>{}); + device_grouped_conv_fwd_wmma_cshufflev3_bf16_gnhwc_instances<3, + GNDHWC, + GKZYXC, + Empty_Tuple, + GNDHWK, + ConvFwd1x1S1P0>{}); } } // namespace instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp deleted file mode 100644 index 850fca5305..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwdDefault>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1P0>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1S1P0>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp similarity index 79% rename from library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp index 480f1cb728..1d13c9a1fd 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instance.cpp +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.cpp @@ -8,7 +8,7 @@ namespace ck { namespace tensor_operation { namespace device { namespace instance { -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_16x16_instances( +void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instances( std::vector{}); add_device_operation_instances( instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_ngchw_instances<3, NGCDHW, GKCZYX, Empty_Tuple, NGKDHW, ConvFwd1x1S1P0>{}); + add_device_operation_instances( + instances, + device_grouped_conv_fwd_wmma_cshufflev3_bf16_ngchw_instances<3, + NGCDHW, + GKCZYX, + Empty_Tuple, + NGKDHW, + ConvFwdOddC>{}); } } // namespace instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.in b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.in index 40a4e67927..8dc3ee2245 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.in +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_instance.in @@ -26,7 +26,7 @@ void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_ins { add_device_operation_instances( instances, - util::filter_tuple_by_modulo_t>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwdDefault, - Interwave>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1P0, - Interwave>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1S1P0, - Interwave>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp deleted file mode 100644 index a26184cd57..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances( - std::vector>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwdDefault, - Intrawave>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1P0, - Intrawave>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Empty_Tuple, - NDHWGK, - ConvFwd1x1S1P0, - Intrawave>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instance.in b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instance.in deleted file mode 100644 index df1d4427b2..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instance.in +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/utility/filter_tuple.hpp" - -namespace ck::tensor_operation::device::instance { - -using device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances = - std::vector>>; -template -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances_shard( - device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_inter_instances& - instances) -{ - add_device_operation_instances( - instances, - ck::util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwdDefault, - Interwave>, - Shards, - ShardIndex>{}); - add_device_operation_instances( - instances, - ck::util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwd1x1P0, - Interwave>, - Shards, - ShardIndex>{}); - add_device_operation_instances( - instances, - ck::util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwd1x1S1P0, - Interwave>, - Shards, - ShardIndex>{}); -} - -} // namespace ck::tensor_operation::device::instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instance.in b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instance.in deleted file mode 100644 index 274cf83add..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/mem/device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instance.in +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/utility/filter_tuple.hpp" - -namespace ck::tensor_operation::device::instance { - -using device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances = - std::vector>>; -template -void add_device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances_shard( - device_grouped_conv3d_fwd_wmma_cshufflev3_ngcdhw_gkczyx_ngkdhw_bf16_mem_intra_instances& - instances) -{ - add_device_operation_instances( - instances, - ck::util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwdDefault, - Intrawave>, - Shards, - ShardIndex>{}); - add_device_operation_instances( - instances, - ck::util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwd1x1P0, - Intrawave>, - Shards, - ShardIndex>{}); - add_device_operation_instances( - instances, - ck::util::filter_tuple_by_modulo_t< - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NGCDHW, - GKCZYX, - Empty_Tuple, - NGKDHW, - ConvFwd1x1S1P0, - Intrawave>, - Shards, - ShardIndex>{}); -} - -} // namespace ck::tensor_operation::device::instance diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt index 4a8be238e2..dcb78bbbd5 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/CMakeLists.txt @@ -34,11 +34,7 @@ set(GROUPED_CONV3D_FWD # WMMA CSHUFFLE V3 wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp - wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp wmma/merged_groups/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp - wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp - wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp - wmma/comp/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/comp/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/comp/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp deleted file mode 100644 index f72c6952cf..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/comp/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwdDefault, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1P0, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1S1P0, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp deleted file mode 100644 index 8f363b0490..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwdDefault, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1P0, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1S1P0, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp deleted file mode 100644 index 0974594723..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwdDefault, - Interwave, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1P0, - Interwave, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1S1P0, - Interwave, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp deleted file mode 100644 index 944d83edd7..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/wmma/mem/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple, - BF16, - PassThrough, - PassThrough, - AddClamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwdDefault, - Intrawave, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1P0, - Intrawave, - Tuple, - AddClamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple, - NDHWGK, - ConvFwd1x1S1P0, - Intrawave, - Tuple, - AddClamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt index 026126bbf9..40c056f95f 100644 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/CMakeLists.txt @@ -34,11 +34,7 @@ set(GROUPED_CONV3D_FWD # WMMA CSHUFFLE V3 wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp - wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp wmma/merged_groups/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp - wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp - wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp - wmma/comp/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/comp/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/comp/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp deleted file mode 100644 index 633f2f962d..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/comp/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" -#include "ck/host_utility/device_prop.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwdDefault, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1P0, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_comp_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1S1P0, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp deleted file mode 100644 index 0f74c9d215..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwdDefault, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1P0, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_16x16_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1S1P0, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp deleted file mode 100644 index 85984d95c3..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwdDefault, - Interwave, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1P0, - Interwave, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1S1P0, - Interwave, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp deleted file mode 100644 index 3abd544249..0000000000 --- a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/wmma/mem/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - -#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp" -#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" - -namespace ck { -namespace tensor_operation { -namespace device { -namespace instance { - -void add_device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances( - std::vector, - NDHWGK, - BF16, - BF16, - Tuple<>, - BF16, - PassThrough, - PassThrough, - Clamp>>>& instances) -{ - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwdDefault, - Intrawave, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1P0, - Intrawave, - Tuple<>, - Clamp>{}); - add_device_operation_instances( - instances, - device_grouped_conv_fwd_wmma_cshufflev3_bf16_mem_instances<3, - NDHWGC, - GKZYXC, - Tuple<>, - NDHWGK, - ConvFwd1x1S1P0, - Intrawave, - Tuple<>, - Clamp>{}); -} - -} // namespace instance -} // namespace device -} // namespace tensor_operation -} // namespace ck