[rocm-libraries] ROCm/rocm-libraries#5516 (commit ff3afda)

[CK_TILE, CK_BUILDER] Add bwd data to CK Tile profiler
 (#5516)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Motivation

We want close the performance gap between old CK and CK Tile for bwd
data convolutions. To achieve this, we need tow things

- Configurations for the old CK kernel instances such that we can map
them into CK Tile instances.
- Support in CK profiler to run the CK Tile instance with the same API
as for old CK instances.

## Technical Details

Extracted kernel configurations from old CK. The codegen python script
for CK Tile convs is extended to support also bwd data. The generated
instances are added to the CMake build (target
`device_grouped_conv_bwd_data_tile_instances`).
A new profiler op (`grouped_conv_bwd_data_tile`) has been added to the
CK Profiler. The API is same as for old CK's profiler op
`grouped_conv_bwd_data`.
This commit is contained in:
Ville Pietilä
2026-03-25 14:36:11 +00:00
committed by assistant-librarian[bot]
parent 1834e318da
commit ec2dbfbfde
29 changed files with 1588 additions and 956 deletions

View File

@@ -296,45 +296,45 @@ struct InstanceTraits<
oss << ","
<< detail::conv_bwd_data_spec_name(
kConvBwdDataSpecialization); // 14. ConvBackwardDataSpecialization
oss << "," << kDoPadGemmM;
oss << "," << kDoPadGemmN;
oss << "," << kNumGemmKPrefetchStage;
oss << "," << kBlockSize; // 15. BlockSize
oss << "," << kMPerBlock; // 16. MPerBlock
oss << "," << kNPerBlock; // 17. NPerBlock
oss << "," << kK0PerBlock; // 18. K0PerBlock
oss << "," << kAK1; // 19. AK1
oss << "," << kBK1; // 19. BK1
oss << "," << kMPerXDL; // 20. MPerXDL
oss << "," << kNPerXDL; // 21. NPerXDL
oss << "," << kMXdlPerWave; // 22. MXdlPerWave
oss << "," << kNXdlPerWave; // 23. NXdlPerWave
oss << "," << detail::sequence_name<ABlockTransferThreadClusterLengths_K0_M_K1>(); // 24.
oss << "," << detail::sequence_name<ABlockTransferThreadClusterArrangeOrder>(); // 25.
oss << "," << detail::sequence_name<ABlockTransferSrcAccessOrder>(); // 26.
oss << "," << kABlockTransferSrcVectorDim; // 27.
oss << "," << kABlockTransferSrcScalarPerVector; // 28.
oss << "," << kABlockTransferDstScalarPerVectorK1; // 29.
oss << "," << (kABlockLdsExtraM ? "true" : "false"); // 30.
oss << "," << detail::sequence_name<BBlockTransferThreadClusterLengths_K0_N_K1>(); // 31.
oss << "," << detail::sequence_name<BBlockTransferThreadClusterArrangeOrder>(); // 32.
oss << "," << detail::sequence_name<BBlockTransferSrcAccessOrder>(); // 33.
oss << "," << kBBlockTransferSrcVectorDim; // 34.
oss << "," << kBBlockTransferSrcScalarPerVector; // 35.
oss << "," << kBBlockTransferDstScalarPerVectorK1; // 36.
oss << "," << (kBBlockLdsExtraN ? "true" : "false"); // 37.
oss << "," << kCShuffleMXdlPerWavePerShuffle; // 38.
oss << "," << kCShuffleNXdlPerWavePerShuffle; // 39.
oss << "," << kDoPadGemmM; // 15. GEMM padding for M dimension
oss << "," << kDoPadGemmN; // 16. GEMM padding for N dimension
oss << "," << kNumGemmKPrefetchStage; // 17. Number of GEMM K prefetch stages
oss << "," << kBlockSize; // 18. BlockSize
oss << "," << kMPerBlock; // 19. MPerBlock
oss << "," << kNPerBlock; // 20. NPerBlock
oss << "," << kK0PerBlock; // 21. K0PerBlock
oss << "," << kAK1; // 22. AK1
oss << "," << kBK1; // 23. BK1
oss << "," << kMPerXDL; // 24. MPerXDL
oss << "," << kNPerXDL; // 25. NPerXDL
oss << "," << kMXdlPerWave; // 26. MXdlPerWave
oss << "," << kNXdlPerWave; // 27. NXdlPerWave
oss << "," << detail::sequence_name<ABlockTransferThreadClusterLengths_K0_M_K1>(); // 28.
oss << "," << detail::sequence_name<ABlockTransferThreadClusterArrangeOrder>(); // 29.
oss << "," << detail::sequence_name<ABlockTransferSrcAccessOrder>(); // 30.
oss << "," << kABlockTransferSrcVectorDim; // 31.
oss << "," << kABlockTransferSrcScalarPerVector; // 32.
oss << "," << kABlockTransferDstScalarPerVectorK1; // 33.
oss << "," << (kABlockLdsExtraM ? "true" : "false"); // 34.
oss << "," << detail::sequence_name<BBlockTransferThreadClusterLengths_K0_N_K1>(); // 35.
oss << "," << detail::sequence_name<BBlockTransferThreadClusterArrangeOrder>(); // 36.
oss << "," << detail::sequence_name<BBlockTransferSrcAccessOrder>(); // 37.
oss << "," << kBBlockTransferSrcVectorDim; // 38.
oss << "," << kBBlockTransferSrcScalarPerVector; // 39.
oss << "," << kBBlockTransferDstScalarPerVectorK1; // 40.
oss << "," << (kBBlockLdsExtraN ? "true" : "false"); // 41.
oss << "," << kCShuffleMXdlPerWavePerShuffle; // 42.
oss << "," << kCShuffleNXdlPerWavePerShuffle; // 43.
oss << ","
<< detail::sequence_name<
CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>(); // 40.
oss << "," << kCBlockTransferScalarPerVector_NWaveNPerXdl; // 42.
oss << "," << kNumGemmKPrefetchStage; // 41.
oss << "," << detail::loop_scheduler_name(kLoopScheduler); // 43. LoopSched
oss << "," << detail::type_name<ComputeTypeA>(); // 44.
oss << "," << detail::type_name<ComputeTypeB>(); // 45.
oss << "," << kMaxTransposeTransferSrcScalarPerVector; // 46.
oss << "," << kMaxTransposeTransferDstScalarPerVector; // 47.
CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>(); // 44.
oss << "," << kCBlockTransferScalarPerVector_NWaveNPerXdl; // 45.
oss << "," << kNumGemmKPrefetchStage; // 46.
oss << "," << detail::loop_scheduler_name(kLoopScheduler); // 47. LoopSched
oss << "," << detail::type_name<ComputeTypeA>(); // 48.
oss << "," << detail::type_name<ComputeTypeB>(); // 49.
oss << "," << kMaxTransposeTransferSrcScalarPerVector; // 50.
oss << "," << kMaxTransposeTransferDstScalarPerVector; // 51.
oss << ">";

View File

@@ -0,0 +1,71 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include "ck_tile/builder/testing/tensor_initialization.hpp"
#include "ck_tile/builder/testing/testing_reflect.hpp"
#include "ck_tile/builder/testing/conv/args.hpp"
#include "ck_tile/builder/testing/conv/fwd.hpp"
#include "ck_tile/builder/testing/error.hpp"
/// This file deals with the backward data-specific details of running grouped
/// convolution backwards data operations. It mainly defines the data
/// structures (`Input` and `Output`), initialization, and validation. Note
/// that for this operation specifically, many of the operations are
/// implemented automatically via testing_reflect.hpp.
namespace ck_tile::builder::test {
/// @brief `Inputs` specialization for backwards data convolution.
///
/// @tparam SIGNATURE Backwards data convolution signature.
///
/// @see Inputs
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsBackwardData<SIGNATURE>
struct Inputs<SIGNATURE>
{
void* weight;
void* output;
// See testing_reflect.hpp
static void reflect(const Args<SIGNATURE>& args, const auto& inspect)
{
inspect("weight", args.make_weight_descriptor(), &Inputs<SIGNATURE>::weight);
inspect("output", args.make_output_descriptor(), &Inputs<SIGNATURE>::output);
}
};
/// @brief `Outputs` specialization for backwards data convolution.
///
/// @tparam SIGNATURE Backward data convolution signature.
///
/// @see Outputs
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsBackwardData<SIGNATURE>
struct Outputs<SIGNATURE>
{
void* input;
// See testing_reflect.hpp
static void reflect(const Args<SIGNATURE>& args, const auto& inspect)
{
inspect("input", args.make_input_descriptor(), &Outputs<SIGNATURE>::input);
}
};
/// @brief `init_inputs()` specialization for backwards convolution.
///
/// @tparam SIGNATURE Backward data convolution signature.
///
/// @see init_inputs()
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsBackwardData<SIGNATURE>
void init_inputs(const Args<SIGNATURE>& args, Inputs<SIGNATURE> inputs)
{
init_tensor_buffer_uniform_fp(inputs.weight, args.make_weight_descriptor(), -2.0f, 2.0f);
init_tensor_buffer_uniform_fp(inputs.output, args.make_output_descriptor(), -2.0f, 2.0f);
}
} // namespace ck_tile::builder::test

View File

@@ -6,6 +6,7 @@
#include "ck_tile/builder/testing/testing.hpp"
#include "ck_tile/builder/testing/conv/fwd.hpp"
#include "ck_tile/builder/testing/conv/bwd_weight.hpp"
#include "ck_tile/builder/testing/conv/bwd_data.hpp"
#include "ck_tile/builder/factory/helpers/ck_tile/conv_tile_tensor_type.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/ops/gemm.hpp"
@@ -35,6 +36,29 @@ concept CkTileConvInstance = requires(Conv&) {
{ Conv::BlockSize() };
};
template <auto SIGNATURE>
std::size_t gemm_split_k_output_size(auto kargs)
{
std::size_t zeroing_size = 0;
if constexpr(ConvDirectionIsBackwardWeight<SIGNATURE>)
{
zeroing_size = std::accumulate(std::begin(kargs.wei_g_k_c_xs_lengths.data),
std::end(kargs.wei_g_k_c_xs_lengths.data),
1,
std::multiplies<std::size_t>());
}
if constexpr(ConvDirectionIsBackwardData<SIGNATURE>)
{
zeroing_size = std::accumulate(std::begin(kargs.in_g_n_c_wis_lengths.data),
std::end(kargs.in_g_n_c_wis_lengths.data),
1,
std::multiplies<std::size_t>());
}
return zeroing_size;
}
template <auto SIGNATURE, typename InDataType, typename WeiDataType, typename OutDataType>
[[nodiscard]] RunResult run(CkTileConvInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
@@ -58,10 +82,8 @@ template <auto SIGNATURE, typename InDataType, typename WeiDataType, typename Ou
return RunResult::not_supported("unsupported ck_tile arguments");
using Types = ck_tile::builder::factory::internal::TileConvTensorTypes<SIGNATURE.data_type>;
const std::size_t zeroing_size = std::accumulate(std::begin(kargs.wei_g_k_c_xs_lengths.data),
std::end(kargs.wei_g_k_c_xs_lengths.data),
1,
std::multiplies<std::size_t>());
const std::size_t zeroing_size = gemm_split_k_output_size<SIGNATURE>(kargs);
auto preprocess = [&]() {
if constexpr(ConvDirectionIsBackwardWeight<SIGNATURE>)
@@ -75,6 +97,18 @@ template <auto SIGNATURE, typename InDataType, typename WeiDataType, typename Ou
s_conf.stream_id_));
}
}
if constexpr(ConvDirectionIsBackwardData<SIGNATURE>)
{
if(kargs.k_batch > 1)
{
ck_tile::hip_check_error(
hipMemsetAsync(kargs.in_ptr,
0,
zeroing_size * sizeof(typename Types::EDataType),
s_conf.stream_id_));
}
}
};
constexpr index_t minimum_occupancy =
@@ -293,4 +327,26 @@ template <auto SIGNATURE>
s_conf);
}
/// @brief `run()` specialization for backwards data convolution and CK Tile.
///
/// @tparam SIGNATURE Backward data convolution signature.
/// @returns RunResult about how the operation completed (or not).
///
/// @see run()
template <auto SIGNATURE>
requires ConvDirectionIsBackwardData<SIGNATURE>
[[nodiscard]] RunResult run(CkTileConvInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
const Inputs<SIGNATURE>& inputs,
const Outputs<SIGNATURE>& outputs,
const ck_tile::stream_config s_conf = {})
{
return detail::run(conv,
args,
static_cast<void*>(outputs.input),
static_cast<const void*>(inputs.weight),
static_cast<const void*>(inputs.output),
s_conf);
}
} // namespace ck_tile::builder::test

View File

@@ -134,4 +134,26 @@ template <auto SIGNATURE>
return detail::run(conv, args, inputs.input, outputs.weight, inputs.output);
}
/// @brief Concept for checking whether this is the reference convolution
/// backward data implementation.
template <typename Conv, auto SIGNATURE>
concept RefConvBwdDataInstance =
detail::RefConvInstance<Conv, SIGNATURE, void*, const void*, const void*> &&
ConvDirectionIsBackwardData<SIGNATURE>;
/// @brief `run()` specialization for the reference backward data implementation.
///
/// @tparam SIGNATURE The signature of the operation to perform. Must be backwards data.
/// @returns RunResult about how the operation completed (or not).
///
/// @see run()
template <auto SIGNATURE>
[[nodiscard]] RunResult run(RefConvBwdDataInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
const Inputs<SIGNATURE>& inputs,
const Outputs<SIGNATURE>& outputs)
{
return detail::run(conv, args, outputs.input, inputs.weight, inputs.output);
}
} // namespace ck_tile::builder::test

View File

@@ -31,6 +31,11 @@ if(GPU_TARGETS MATCHES "gfx9")
add_instance_library(device_grouped_conv_bwd_weight_tile_instances ${GROUPED_CONV_BWD_WEIGHT_TILE})
target_include_directories(device_grouped_conv_bwd_weight_tile_instances PRIVATE
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
target_compile_options(device_grouped_conv_bwd_weight_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
file(GLOB_RECURSE GROUPED_CONV_BWD_DATA_TILE "${CMAKE_CURRENT_BINARY_DIR}/backward_data/*.cpp")
add_instance_library(device_grouped_conv_bwd_data_tile_instances ${GROUPED_CONV_BWD_DATA_TILE})
target_include_directories(device_grouped_conv_bwd_data_tile_instances PRIVATE
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
target_compile_options(device_grouped_conv_bwd_data_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
endif()

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,70 +1,70 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,82 +1,82 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,70 +1,70 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,14 +1,14 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>

View File

@@ -1,16 +1,16 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>

View File

@@ -1,14 +1,14 @@
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>

View File

@@ -144,6 +144,7 @@ def copy_includes(instances_path):
output_dir.mkdir(parents=True, exist_ok=True)
shutil.copy(f"{inc_dir}/include/instance_includes.inc", instances_path)
shutil.copy(f"{inc_dir}/include/instance_run.inc", instances_path)
shutil.copy(f"{inc_dir}/include/signatures.hpp", instances_path)
def generate_calls_inc(instances, problem_name, direction, filter_pattern):
generate_dir = Path(__file__).resolve().parent
@@ -467,8 +468,131 @@ def parse_bwd_weight_instances(instances, problem_name):
def parse_bwd_data_instances(instances, problem_name):
convs = []
print("Parsing backward data instances is not supported yet, skipping all instances.")
# TODO: Implement parsing logic for backward data instances.
for instance_id, instance in enumerate(instances):
if instance.find("#") != -1 or instance.find(";") != -1:
continue
start = instance.index('<') + 1
end = instance.rindex('>')
params_str = instance[start:end]
args = parse_instance_string(params_str)
is_v1_instance = instance.find("Xdl_CShuffle<") != -1
if is_v1_instance:
if len(args) != 51:
raise RuntimeError(f"Wrong number of parameters in the V1 XDL CShuffle instance string: {instance}\n" +
f"Expected 51 parameters for V1 instance. Found {len(args)} parameters.")
else:
raise RuntimeError(f"Only V1 XDL CShuffle instances are supported for backward data. Found instance: {instance}")
spec = args[13]
block_size = int(args[17])
m_per_block = int(args[18])
n_per_block = int(args[19])
k_per_block = int(args[20])
ak1 = int(args[21])
bk1 = int(args[22])
m_per_xdl = int(args[23])
n_per_xdl = int(args[24])
m_xdl_per_wave = int(args[25])
n_xdl_per_wave = int(args[26])
a_scalar_per_vector = int(args[31])
b_scalar_per_vector = int(args[38])
c_scalar_per_vector = int(args[44])
if ak1 != bk1:
raise RuntimeError(f"Not supported instance {instance_id} since ak1 != bk1. ak1: {ak1}, bk1: {bk1} in instance: {instance}")
k1 = min(ak1, bk1)
# TODO: Do we need split image for 3D bwd data convs?
split_image = False
# Default optimization parameters
num_groups_to_merge = 1
is_two_stage_instance = False
is_explicit_gemm = False
num_wave_groups = 1
direct_load = False
# Block GEMM pipeline parameters
block_gemm_pipeline_scheduler = args[46]
if block_gemm_pipeline_scheduler == "Default":
block_gemm_pipeline_scheduler = "Intrawave"
blk_gemm_pipeline_version = "v1"
if block_gemm_pipeline_scheduler == "Interwave":
blk_gemm_pipeline_version = "v1"
# Sanity check for Block GEMM pipeline parameters
# Scheduler must be either Intrawave or Interwave.
# Version must be from v1 to v5
if block_gemm_pipeline_scheduler not in ["Intrawave", "Interwave"]:
raise RuntimeError(f"Invalid Block GEMM pipeline scheduler: {block_gemm_pipeline_scheduler} in instance: {instance}")
if blk_gemm_pipeline_version not in ["v1", "v2", "v3", "v4", "v5"]:
raise RuntimeError(f"Invalid Block GEMM pipeline version: {blk_gemm_pipeline_version} in instance: {instance}")
double_smem_buffer = blk_gemm_pipeline_version == "v4"
scheduler = block_gemm_pipeline_scheduler
pipeline_version = blk_gemm_pipeline_version.upper()
# Old CK pipeline version V5 maps to V6 for CK Tile
if pipeline_version == "V5":
pipeline_version = "V6"
if direct_load:
if pipeline_version == "V1":
pipeline_version = "ASYNC_V1"
elif pipeline_version == "V4":
pipeline_version = "ASYNC_V4"
else:
raise RuntimeError(
f"Not supported pipeline for direct load: pipeline_version={pipeline_version} in instance: {instance}"
)
m_warp = int(m_per_block / (m_per_xdl * m_xdl_per_wave))
n_warp = int(n_per_block / (n_per_xdl * n_xdl_per_wave))
warp_size = 64
k_warp = int(block_size / (warp_size * m_warp * n_warp))
dtype = get_dtype(problem_name)
k_per_xdl = max(k1, get_k_mfma(dtype, m_per_xdl, n_per_xdl))
if check_vectors(a_scalar_per_vector, b_scalar_per_vector, c_scalar_per_vector) == False:
print(f"Skipping instance {instance_id} with irregular load since it's not supported yet.")
continue
if pipeline_version == "V6":
print(f"Skipping instance {instance_id} with V6 since it's not supported yet.")
continue
# Check vector sizes for A and B tensors - we cannot oversubscribe.
num_tile_elements_a = m_per_xdl * k_per_xdl
num_tile_elements_b = n_per_xdl * k_per_xdl
max_vector_size_a = max(1, num_tile_elements_a // block_size)
max_vector_size_b = max(1, num_tile_elements_b // block_size)
a_scalar_per_vector = min(a_scalar_per_vector, max_vector_size_a)
b_scalar_per_vector = min(b_scalar_per_vector, max_vector_size_b)
conv = ConvInstanceTemplateParams(
spec,
[m_per_block, n_per_block, k_per_block],
[m_warp, n_warp, k_warp],
[m_per_xdl, n_per_xdl, k_per_xdl],
double_smem_buffer,
num_wave_groups,
is_two_stage_instance,
pipeline_version,
scheduler,
[a_scalar_per_vector, b_scalar_per_vector, c_scalar_per_vector],
num_groups_to_merge,
split_image,
is_explicit_gemm,
instance_id,
)
convs.append(conv)
return convs
def generate_instances_fwd(instances, problem_name, config, filter_pattern, instances_path):

View File

@@ -1,177 +1,8 @@
#include "../../builder/test/utils/ckb_conv_tile_test_configs.hpp"
#include "ck_tile/builder/testing/conv/fwd.hpp"
#include "ck_tile/builder/testing/conv/bwd_weight.hpp"
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
#include "ck_tile/builder/testing/conv/bwd_data.hpp"
#include "signatures.hpp"
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
namespace cku = ck_tile::builder::test_utils;
namespace ckf = ck_tile::builder::factory;
namespace ck_tile::builder::profiling {
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
// Backward Weight Signatures
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
// Backward Data Signatures
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
} // namespace ck_tile::builder::profiling

View File

@@ -0,0 +1,186 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include <tuple>
#include "../../builder/test/impl/conv_signature_types.hpp"
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
namespace ck_tile::builder::profiling {
namespace ckb = ck_tile::builder;
namespace ckt = ck_tile::builder::test;
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::FORWARD,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
/////////////////////////////////////////
// BWD WEIGHT signatures
//////////////////////////////////////////
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
/////////////////////////////////////////
// BWD DATA signatures
//////////////////////////////////////////
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 2,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::BF16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP16,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
ckt::ConvSignature{.spatial_dim = 3,
.direction = ckb::ConvDirection::BACKWARD_DATA,
.data_type = ckb::DataType::FP32,
.accumulation_data_type = ckb::DataType::FP32,
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
} // namespace ck_tile::builder::profiling

View File

@@ -50,6 +50,7 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
"-D__HIP_PLATFORM_AMD__",
"-D CK_EXPERIMENTAL_BUILDER=ON",
"-O3",
"-Wno-unknown-warning-option",
*include_flags,
str(cpp_file),
"-o", str(output_file)
@@ -63,10 +64,15 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
timeout=300 # 5 minute timeout per file
)
print(f"\n\n Command: {' '.join(cmd)}\n") if verbose else None
if result.returncode == 0:
return True, ""
else:
# Extract the key error message
if verbose and result.stderr:
print(f" {result.stderr}")
print()
error_output = result.stderr
return False, error_output