mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 14:29:05 +00:00
[rocm-libraries] ROCm/rocm-libraries#5516 (commit ff3afda)
[CK_TILE, CK_BUILDER] Add bwd data to CK Tile profiler (#5516) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation We want close the performance gap between old CK and CK Tile for bwd data convolutions. To achieve this, we need tow things - Configurations for the old CK kernel instances such that we can map them into CK Tile instances. - Support in CK profiler to run the CK Tile instance with the same API as for old CK instances. ## Technical Details Extracted kernel configurations from old CK. The codegen python script for CK Tile convs is extended to support also bwd data. The generated instances are added to the CMake build (target `device_grouped_conv_bwd_data_tile_instances`). A new profiler op (`grouped_conv_bwd_data_tile`) has been added to the CK Profiler. The API is same as for old CK's profiler op `grouped_conv_bwd_data`.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
1834e318da
commit
ec2dbfbfde
1
.gitignore
vendored
1
.gitignore
vendored
@@ -112,4 +112,5 @@ test_data/*
|
||||
experimental/grouped_convolution_tile_instances/instances/*
|
||||
!experimental/grouped_convolution_tile_instances/instances/*.in
|
||||
!experimental/grouped_convolution_tile_instances/instances/*.inc
|
||||
!experimental/grouped_convolution_tile_instances/instances/*.hpp
|
||||
experimental/grouped_convolution_tile_instances/*.inc
|
||||
|
||||
@@ -296,45 +296,45 @@ struct InstanceTraits<
|
||||
oss << ","
|
||||
<< detail::conv_bwd_data_spec_name(
|
||||
kConvBwdDataSpecialization); // 14. ConvBackwardDataSpecialization
|
||||
oss << "," << kDoPadGemmM;
|
||||
oss << "," << kDoPadGemmN;
|
||||
oss << "," << kNumGemmKPrefetchStage;
|
||||
oss << "," << kBlockSize; // 15. BlockSize
|
||||
oss << "," << kMPerBlock; // 16. MPerBlock
|
||||
oss << "," << kNPerBlock; // 17. NPerBlock
|
||||
oss << "," << kK0PerBlock; // 18. K0PerBlock
|
||||
oss << "," << kAK1; // 19. AK1
|
||||
oss << "," << kBK1; // 19. BK1
|
||||
oss << "," << kMPerXDL; // 20. MPerXDL
|
||||
oss << "," << kNPerXDL; // 21. NPerXDL
|
||||
oss << "," << kMXdlPerWave; // 22. MXdlPerWave
|
||||
oss << "," << kNXdlPerWave; // 23. NXdlPerWave
|
||||
oss << "," << detail::sequence_name<ABlockTransferThreadClusterLengths_K0_M_K1>(); // 24.
|
||||
oss << "," << detail::sequence_name<ABlockTransferThreadClusterArrangeOrder>(); // 25.
|
||||
oss << "," << detail::sequence_name<ABlockTransferSrcAccessOrder>(); // 26.
|
||||
oss << "," << kABlockTransferSrcVectorDim; // 27.
|
||||
oss << "," << kABlockTransferSrcScalarPerVector; // 28.
|
||||
oss << "," << kABlockTransferDstScalarPerVectorK1; // 29.
|
||||
oss << "," << (kABlockLdsExtraM ? "true" : "false"); // 30.
|
||||
oss << "," << detail::sequence_name<BBlockTransferThreadClusterLengths_K0_N_K1>(); // 31.
|
||||
oss << "," << detail::sequence_name<BBlockTransferThreadClusterArrangeOrder>(); // 32.
|
||||
oss << "," << detail::sequence_name<BBlockTransferSrcAccessOrder>(); // 33.
|
||||
oss << "," << kBBlockTransferSrcVectorDim; // 34.
|
||||
oss << "," << kBBlockTransferSrcScalarPerVector; // 35.
|
||||
oss << "," << kBBlockTransferDstScalarPerVectorK1; // 36.
|
||||
oss << "," << (kBBlockLdsExtraN ? "true" : "false"); // 37.
|
||||
oss << "," << kCShuffleMXdlPerWavePerShuffle; // 38.
|
||||
oss << "," << kCShuffleNXdlPerWavePerShuffle; // 39.
|
||||
oss << "," << kDoPadGemmM; // 15. GEMM padding for M dimension
|
||||
oss << "," << kDoPadGemmN; // 16. GEMM padding for N dimension
|
||||
oss << "," << kNumGemmKPrefetchStage; // 17. Number of GEMM K prefetch stages
|
||||
oss << "," << kBlockSize; // 18. BlockSize
|
||||
oss << "," << kMPerBlock; // 19. MPerBlock
|
||||
oss << "," << kNPerBlock; // 20. NPerBlock
|
||||
oss << "," << kK0PerBlock; // 21. K0PerBlock
|
||||
oss << "," << kAK1; // 22. AK1
|
||||
oss << "," << kBK1; // 23. BK1
|
||||
oss << "," << kMPerXDL; // 24. MPerXDL
|
||||
oss << "," << kNPerXDL; // 25. NPerXDL
|
||||
oss << "," << kMXdlPerWave; // 26. MXdlPerWave
|
||||
oss << "," << kNXdlPerWave; // 27. NXdlPerWave
|
||||
oss << "," << detail::sequence_name<ABlockTransferThreadClusterLengths_K0_M_K1>(); // 28.
|
||||
oss << "," << detail::sequence_name<ABlockTransferThreadClusterArrangeOrder>(); // 29.
|
||||
oss << "," << detail::sequence_name<ABlockTransferSrcAccessOrder>(); // 30.
|
||||
oss << "," << kABlockTransferSrcVectorDim; // 31.
|
||||
oss << "," << kABlockTransferSrcScalarPerVector; // 32.
|
||||
oss << "," << kABlockTransferDstScalarPerVectorK1; // 33.
|
||||
oss << "," << (kABlockLdsExtraM ? "true" : "false"); // 34.
|
||||
oss << "," << detail::sequence_name<BBlockTransferThreadClusterLengths_K0_N_K1>(); // 35.
|
||||
oss << "," << detail::sequence_name<BBlockTransferThreadClusterArrangeOrder>(); // 36.
|
||||
oss << "," << detail::sequence_name<BBlockTransferSrcAccessOrder>(); // 37.
|
||||
oss << "," << kBBlockTransferSrcVectorDim; // 38.
|
||||
oss << "," << kBBlockTransferSrcScalarPerVector; // 39.
|
||||
oss << "," << kBBlockTransferDstScalarPerVectorK1; // 40.
|
||||
oss << "," << (kBBlockLdsExtraN ? "true" : "false"); // 41.
|
||||
oss << "," << kCShuffleMXdlPerWavePerShuffle; // 42.
|
||||
oss << "," << kCShuffleNXdlPerWavePerShuffle; // 43.
|
||||
oss << ","
|
||||
<< detail::sequence_name<
|
||||
CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>(); // 40.
|
||||
oss << "," << kCBlockTransferScalarPerVector_NWaveNPerXdl; // 42.
|
||||
oss << "," << kNumGemmKPrefetchStage; // 41.
|
||||
oss << "," << detail::loop_scheduler_name(kLoopScheduler); // 43. LoopSched
|
||||
oss << "," << detail::type_name<ComputeTypeA>(); // 44.
|
||||
oss << "," << detail::type_name<ComputeTypeB>(); // 45.
|
||||
oss << "," << kMaxTransposeTransferSrcScalarPerVector; // 46.
|
||||
oss << "," << kMaxTransposeTransferDstScalarPerVector; // 47.
|
||||
CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock>(); // 44.
|
||||
oss << "," << kCBlockTransferScalarPerVector_NWaveNPerXdl; // 45.
|
||||
oss << "," << kNumGemmKPrefetchStage; // 46.
|
||||
oss << "," << detail::loop_scheduler_name(kLoopScheduler); // 47. LoopSched
|
||||
oss << "," << detail::type_name<ComputeTypeA>(); // 48.
|
||||
oss << "," << detail::type_name<ComputeTypeB>(); // 49.
|
||||
oss << "," << kMaxTransposeTransferSrcScalarPerVector; // 50.
|
||||
oss << "," << kMaxTransposeTransferDstScalarPerVector; // 51.
|
||||
|
||||
oss << ">";
|
||||
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ck_tile/builder/testing/tensor_initialization.hpp"
|
||||
#include "ck_tile/builder/testing/testing_reflect.hpp"
|
||||
#include "ck_tile/builder/testing/conv/args.hpp"
|
||||
#include "ck_tile/builder/testing/conv/fwd.hpp"
|
||||
#include "ck_tile/builder/testing/error.hpp"
|
||||
|
||||
/// This file deals with the backward data-specific details of running grouped
|
||||
/// convolution backwards data operations. It mainly defines the data
|
||||
/// structures (`Input` and `Output`), initialization, and validation. Note
|
||||
/// that for this operation specifically, many of the operations are
|
||||
/// implemented automatically via testing_reflect.hpp.
|
||||
|
||||
namespace ck_tile::builder::test {
|
||||
|
||||
/// @brief `Inputs` specialization for backwards data convolution.
|
||||
///
|
||||
/// @tparam SIGNATURE Backwards data convolution signature.
|
||||
///
|
||||
/// @see Inputs
|
||||
template <auto SIGNATURE>
|
||||
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsBackwardData<SIGNATURE>
|
||||
struct Inputs<SIGNATURE>
|
||||
{
|
||||
void* weight;
|
||||
void* output;
|
||||
|
||||
// See testing_reflect.hpp
|
||||
static void reflect(const Args<SIGNATURE>& args, const auto& inspect)
|
||||
{
|
||||
inspect("weight", args.make_weight_descriptor(), &Inputs<SIGNATURE>::weight);
|
||||
inspect("output", args.make_output_descriptor(), &Inputs<SIGNATURE>::output);
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief `Outputs` specialization for backwards data convolution.
|
||||
///
|
||||
/// @tparam SIGNATURE Backward data convolution signature.
|
||||
///
|
||||
/// @see Outputs
|
||||
template <auto SIGNATURE>
|
||||
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsBackwardData<SIGNATURE>
|
||||
struct Outputs<SIGNATURE>
|
||||
{
|
||||
void* input;
|
||||
|
||||
// See testing_reflect.hpp
|
||||
static void reflect(const Args<SIGNATURE>& args, const auto& inspect)
|
||||
{
|
||||
inspect("input", args.make_input_descriptor(), &Outputs<SIGNATURE>::input);
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief `init_inputs()` specialization for backwards convolution.
|
||||
///
|
||||
/// @tparam SIGNATURE Backward data convolution signature.
|
||||
///
|
||||
/// @see init_inputs()
|
||||
template <auto SIGNATURE>
|
||||
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsBackwardData<SIGNATURE>
|
||||
void init_inputs(const Args<SIGNATURE>& args, Inputs<SIGNATURE> inputs)
|
||||
{
|
||||
init_tensor_buffer_uniform_fp(inputs.weight, args.make_weight_descriptor(), -2.0f, 2.0f);
|
||||
init_tensor_buffer_uniform_fp(inputs.output, args.make_output_descriptor(), -2.0f, 2.0f);
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "ck_tile/builder/testing/testing.hpp"
|
||||
#include "ck_tile/builder/testing/conv/fwd.hpp"
|
||||
#include "ck_tile/builder/testing/conv/bwd_weight.hpp"
|
||||
#include "ck_tile/builder/testing/conv/bwd_data.hpp"
|
||||
#include "ck_tile/builder/factory/helpers/ck_tile/conv_tile_tensor_type.hpp"
|
||||
#include "ck_tile/host/kernel_launch.hpp"
|
||||
#include "ck_tile/ops/gemm.hpp"
|
||||
@@ -35,6 +36,29 @@ concept CkTileConvInstance = requires(Conv&) {
|
||||
{ Conv::BlockSize() };
|
||||
};
|
||||
|
||||
template <auto SIGNATURE>
|
||||
std::size_t gemm_split_k_output_size(auto kargs)
|
||||
{
|
||||
std::size_t zeroing_size = 0;
|
||||
if constexpr(ConvDirectionIsBackwardWeight<SIGNATURE>)
|
||||
{
|
||||
zeroing_size = std::accumulate(std::begin(kargs.wei_g_k_c_xs_lengths.data),
|
||||
std::end(kargs.wei_g_k_c_xs_lengths.data),
|
||||
1,
|
||||
std::multiplies<std::size_t>());
|
||||
}
|
||||
|
||||
if constexpr(ConvDirectionIsBackwardData<SIGNATURE>)
|
||||
{
|
||||
zeroing_size = std::accumulate(std::begin(kargs.in_g_n_c_wis_lengths.data),
|
||||
std::end(kargs.in_g_n_c_wis_lengths.data),
|
||||
1,
|
||||
std::multiplies<std::size_t>());
|
||||
}
|
||||
|
||||
return zeroing_size;
|
||||
}
|
||||
|
||||
template <auto SIGNATURE, typename InDataType, typename WeiDataType, typename OutDataType>
|
||||
[[nodiscard]] RunResult run(CkTileConvInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
@@ -58,10 +82,8 @@ template <auto SIGNATURE, typename InDataType, typename WeiDataType, typename Ou
|
||||
return RunResult::not_supported("unsupported ck_tile arguments");
|
||||
|
||||
using Types = ck_tile::builder::factory::internal::TileConvTensorTypes<SIGNATURE.data_type>;
|
||||
const std::size_t zeroing_size = std::accumulate(std::begin(kargs.wei_g_k_c_xs_lengths.data),
|
||||
std::end(kargs.wei_g_k_c_xs_lengths.data),
|
||||
1,
|
||||
std::multiplies<std::size_t>());
|
||||
|
||||
const std::size_t zeroing_size = gemm_split_k_output_size<SIGNATURE>(kargs);
|
||||
|
||||
auto preprocess = [&]() {
|
||||
if constexpr(ConvDirectionIsBackwardWeight<SIGNATURE>)
|
||||
@@ -75,6 +97,18 @@ template <auto SIGNATURE, typename InDataType, typename WeiDataType, typename Ou
|
||||
s_conf.stream_id_));
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(ConvDirectionIsBackwardData<SIGNATURE>)
|
||||
{
|
||||
if(kargs.k_batch > 1)
|
||||
{
|
||||
ck_tile::hip_check_error(
|
||||
hipMemsetAsync(kargs.in_ptr,
|
||||
0,
|
||||
zeroing_size * sizeof(typename Types::EDataType),
|
||||
s_conf.stream_id_));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
constexpr index_t minimum_occupancy =
|
||||
@@ -293,4 +327,26 @@ template <auto SIGNATURE>
|
||||
s_conf);
|
||||
}
|
||||
|
||||
/// @brief `run()` specialization for backwards data convolution and CK Tile.
|
||||
///
|
||||
/// @tparam SIGNATURE Backward data convolution signature.
|
||||
/// @returns RunResult about how the operation completed (or not).
|
||||
///
|
||||
/// @see run()
|
||||
template <auto SIGNATURE>
|
||||
requires ConvDirectionIsBackwardData<SIGNATURE>
|
||||
[[nodiscard]] RunResult run(CkTileConvInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs,
|
||||
const ck_tile::stream_config s_conf = {})
|
||||
{
|
||||
return detail::run(conv,
|
||||
args,
|
||||
static_cast<void*>(outputs.input),
|
||||
static_cast<const void*>(inputs.weight),
|
||||
static_cast<const void*>(inputs.output),
|
||||
s_conf);
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
|
||||
@@ -134,4 +134,26 @@ template <auto SIGNATURE>
|
||||
return detail::run(conv, args, inputs.input, outputs.weight, inputs.output);
|
||||
}
|
||||
|
||||
/// @brief Concept for checking whether this is the reference convolution
|
||||
/// backward data implementation.
|
||||
template <typename Conv, auto SIGNATURE>
|
||||
concept RefConvBwdDataInstance =
|
||||
detail::RefConvInstance<Conv, SIGNATURE, void*, const void*, const void*> &&
|
||||
ConvDirectionIsBackwardData<SIGNATURE>;
|
||||
|
||||
/// @brief `run()` specialization for the reference backward data implementation.
|
||||
///
|
||||
/// @tparam SIGNATURE The signature of the operation to perform. Must be backwards data.
|
||||
/// @returns RunResult about how the operation completed (or not).
|
||||
///
|
||||
/// @see run()
|
||||
template <auto SIGNATURE>
|
||||
[[nodiscard]] RunResult run(RefConvBwdDataInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs)
|
||||
{
|
||||
return detail::run(conv, args, outputs.input, inputs.weight, inputs.output);
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
|
||||
@@ -31,6 +31,11 @@ if(GPU_TARGETS MATCHES "gfx9")
|
||||
add_instance_library(device_grouped_conv_bwd_weight_tile_instances ${GROUPED_CONV_BWD_WEIGHT_TILE})
|
||||
target_include_directories(device_grouped_conv_bwd_weight_tile_instances PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
|
||||
|
||||
target_compile_options(device_grouped_conv_bwd_weight_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
|
||||
|
||||
file(GLOB_RECURSE GROUPED_CONV_BWD_DATA_TILE "${CMAKE_CURRENT_BINARY_DIR}/backward_data/*.cpp")
|
||||
add_instance_library(device_grouped_conv_bwd_data_tile_instances ${GROUPED_CONV_BWD_DATA_TILE})
|
||||
target_include_directories(device_grouped_conv_bwd_data_tile_instances PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/experimental/builder/test/utils")
|
||||
target_compile_options(device_grouped_conv_bwd_data_tile_instances PRIVATE -DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0)
|
||||
endif()
|
||||
|
||||
@@ -1,82 +1,82 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
|
||||
@@ -1,82 +1,82 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
|
||||
@@ -1,70 +1,70 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
|
||||
@@ -1,82 +1,82 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
|
||||
@@ -1,82 +1,82 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Default, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,2,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
|
||||
@@ -1,70 +1,70 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Default, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Default, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 1, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 256, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 32, 64, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 4, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Default, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Default, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,1,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,false,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,256,32,8,8,32,32,2,4,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,64,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,128,32,8,8,32,32,4,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,64,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,64,32,8,8,32,32,2,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,64,32,8,8,32,32,2,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,32,32,8,8,32,32,2,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,32,64,32,8,8,32,32,1,2,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,4,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,32,8,8,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,2,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<3,NDHWGK,GKZYXC,EmptyTuple,NDHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,bf16,bf16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,bf16,bf16,fp32,EmptyTuple,bf16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,bf16,bf16,1,1>
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 8, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Default, 32, 32, 1, 1, 16, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 64, 16, 16, Filter1x1Stride1Pad0, 32, 32, 1, 1, 16, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 64, 16, 16, Filter1x1Stride1Pad0, 16, 16, 1, 1, 16, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,32,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,32,1,8),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,2,1),Seq(0,2,1),Seq(0,2,1),1,8,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,8,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,8,1,32),1,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,64,16,16,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),8,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp16,fp16,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp16,fp16,fp32,EmptyTuple,fp16,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,64,16,16,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,16,4,true,Seq(4,4,16),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp16,fp16,1,1>
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 256, 128, 32, 8, 8, Default, 32, 32, 4, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 64, 128, 32, 8, 8, Default, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 32, 128, 32, 8, 8, Default, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<128, 128, 32, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 2, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 128, 32, 8, 8, Filter1x1Stride1Pad0, 32, 32, 1, 2, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 16, 64, 32, 8, 8, Default, 16, 16, 1, 4, 4, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Default, 16, 16, 4, 1, 1, 4, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<64, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 4, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Default, 32, 32, 1, 1, 4, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Default, 16, 16, 1, 1, 8, 2, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 128, 32, 16, 4, 4, Filter1x1Stride1Pad0, 32, 32, 1, 1, 4, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 32, 8, 8, Filter1x1Stride1Pad0, 16, 16, 1, 1, 8, 8, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1<256, 64, 16, 16, 4, 4, Filter1x1Stride1Pad0, 16, 16, 1, 1, 4, 1, 1, 1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,256,128,32,8,8,32,32,4,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,64,128,32,8,8,32,32,2,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,128,32,128,32,8,8,32,32,1,2,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,16,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,128,128,32,32,8,8,32,32,2,1,Seq(4,32,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,1),Seq(0,2,1),Seq(0,2,1),1,1,4,true,1,1,Seq(1,32,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,128,32,8,8,32,32,2,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,128,32,8,8,32,32,1,2,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,32,1),Seq(0,2,1),Seq(0,2,1),1,4,4,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,16,64,32,8,8,16,16,1,4,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,1,8,true,Seq(4,4,1),Seq(0,2,1),Seq(0,2,1),1,4,8,true,1,1,Seq(1,16,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,64,64,16,32,8,8,16,16,4,1,Seq(4,16,1),Seq(1,0,2),Seq(1,0,2),2,4,8,true,Seq(4,16,1),Seq(0,2,1),Seq(0,2,1),1,1,8,true,1,1,Seq(1,16,1,4),1,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,8,4),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,16,1,16),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Default,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,8,8),Seq(0,2,1),Seq(0,2,1),1,2,1,true,1,1,Seq(1,32,1,8),2,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,128,32,16,4,4,32,32,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,4,4),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,32,1,8),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,32,8,8,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,8,4,true,Seq(4,2,8),Seq(0,2,1),Seq(0,2,1),1,8,1,true,1,1,Seq(1,64,1,4),4,1,Default,fp32,fp32,1,1>
|
||||
DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle<2,NHWGK,GKYXC,EmptyTuple,NHWGC,fp32,fp32,fp32,EmptyTuple,fp32,PassThrough,PassThrough,PassThrough,Filter1x1Stride1Pad0,1,1,1,256,64,16,16,4,4,16,16,1,1,Seq(4,64,1),Seq(1,0,2),Seq(1,0,2),2,4,4,true,Seq(4,16,4),Seq(0,2,1),Seq(0,2,1),1,1,1,true,1,1,Seq(1,16,1,16),1,1,Default,fp32,fp32,1,1>
|
||||
|
||||
@@ -144,6 +144,7 @@ def copy_includes(instances_path):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(f"{inc_dir}/include/instance_includes.inc", instances_path)
|
||||
shutil.copy(f"{inc_dir}/include/instance_run.inc", instances_path)
|
||||
shutil.copy(f"{inc_dir}/include/signatures.hpp", instances_path)
|
||||
|
||||
def generate_calls_inc(instances, problem_name, direction, filter_pattern):
|
||||
generate_dir = Path(__file__).resolve().parent
|
||||
@@ -467,8 +468,131 @@ def parse_bwd_weight_instances(instances, problem_name):
|
||||
|
||||
def parse_bwd_data_instances(instances, problem_name):
|
||||
convs = []
|
||||
print("Parsing backward data instances is not supported yet, skipping all instances.")
|
||||
# TODO: Implement parsing logic for backward data instances.
|
||||
|
||||
for instance_id, instance in enumerate(instances):
|
||||
if instance.find("#") != -1 or instance.find(";") != -1:
|
||||
continue
|
||||
|
||||
start = instance.index('<') + 1
|
||||
end = instance.rindex('>')
|
||||
params_str = instance[start:end]
|
||||
args = parse_instance_string(params_str)
|
||||
|
||||
is_v1_instance = instance.find("Xdl_CShuffle<") != -1
|
||||
|
||||
if is_v1_instance:
|
||||
if len(args) != 51:
|
||||
raise RuntimeError(f"Wrong number of parameters in the V1 XDL CShuffle instance string: {instance}\n" +
|
||||
f"Expected 51 parameters for V1 instance. Found {len(args)} parameters.")
|
||||
else:
|
||||
raise RuntimeError(f"Only V1 XDL CShuffle instances are supported for backward data. Found instance: {instance}")
|
||||
|
||||
spec = args[13]
|
||||
block_size = int(args[17])
|
||||
m_per_block = int(args[18])
|
||||
n_per_block = int(args[19])
|
||||
k_per_block = int(args[20])
|
||||
ak1 = int(args[21])
|
||||
bk1 = int(args[22])
|
||||
m_per_xdl = int(args[23])
|
||||
n_per_xdl = int(args[24])
|
||||
m_xdl_per_wave = int(args[25])
|
||||
n_xdl_per_wave = int(args[26])
|
||||
a_scalar_per_vector = int(args[31])
|
||||
b_scalar_per_vector = int(args[38])
|
||||
c_scalar_per_vector = int(args[44])
|
||||
|
||||
if ak1 != bk1:
|
||||
raise RuntimeError(f"Not supported instance {instance_id} since ak1 != bk1. ak1: {ak1}, bk1: {bk1} in instance: {instance}")
|
||||
|
||||
k1 = min(ak1, bk1)
|
||||
|
||||
# TODO: Do we need split image for 3D bwd data convs?
|
||||
split_image = False
|
||||
|
||||
# Default optimization parameters
|
||||
num_groups_to_merge = 1
|
||||
is_two_stage_instance = False
|
||||
is_explicit_gemm = False
|
||||
num_wave_groups = 1
|
||||
direct_load = False
|
||||
|
||||
# Block GEMM pipeline parameters
|
||||
block_gemm_pipeline_scheduler = args[46]
|
||||
if block_gemm_pipeline_scheduler == "Default":
|
||||
block_gemm_pipeline_scheduler = "Intrawave"
|
||||
|
||||
blk_gemm_pipeline_version = "v1"
|
||||
if block_gemm_pipeline_scheduler == "Interwave":
|
||||
blk_gemm_pipeline_version = "v1"
|
||||
|
||||
# Sanity check for Block GEMM pipeline parameters
|
||||
# Scheduler must be either Intrawave or Interwave.
|
||||
# Version must be from v1 to v5
|
||||
if block_gemm_pipeline_scheduler not in ["Intrawave", "Interwave"]:
|
||||
raise RuntimeError(f"Invalid Block GEMM pipeline scheduler: {block_gemm_pipeline_scheduler} in instance: {instance}")
|
||||
if blk_gemm_pipeline_version not in ["v1", "v2", "v3", "v4", "v5"]:
|
||||
raise RuntimeError(f"Invalid Block GEMM pipeline version: {blk_gemm_pipeline_version} in instance: {instance}")
|
||||
|
||||
double_smem_buffer = blk_gemm_pipeline_version == "v4"
|
||||
scheduler = block_gemm_pipeline_scheduler
|
||||
pipeline_version = blk_gemm_pipeline_version.upper()
|
||||
|
||||
# Old CK pipeline version V5 maps to V6 for CK Tile
|
||||
if pipeline_version == "V5":
|
||||
pipeline_version = "V6"
|
||||
|
||||
if direct_load:
|
||||
if pipeline_version == "V1":
|
||||
pipeline_version = "ASYNC_V1"
|
||||
elif pipeline_version == "V4":
|
||||
pipeline_version = "ASYNC_V4"
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Not supported pipeline for direct load: pipeline_version={pipeline_version} in instance: {instance}"
|
||||
)
|
||||
|
||||
m_warp = int(m_per_block / (m_per_xdl * m_xdl_per_wave))
|
||||
n_warp = int(n_per_block / (n_per_xdl * n_xdl_per_wave))
|
||||
warp_size = 64
|
||||
k_warp = int(block_size / (warp_size * m_warp * n_warp))
|
||||
dtype = get_dtype(problem_name)
|
||||
|
||||
k_per_xdl = max(k1, get_k_mfma(dtype, m_per_xdl, n_per_xdl))
|
||||
|
||||
if check_vectors(a_scalar_per_vector, b_scalar_per_vector, c_scalar_per_vector) == False:
|
||||
print(f"Skipping instance {instance_id} with irregular load since it's not supported yet.")
|
||||
continue
|
||||
if pipeline_version == "V6":
|
||||
print(f"Skipping instance {instance_id} with V6 since it's not supported yet.")
|
||||
continue
|
||||
|
||||
# Check vector sizes for A and B tensors - we cannot oversubscribe.
|
||||
num_tile_elements_a = m_per_xdl * k_per_xdl
|
||||
num_tile_elements_b = n_per_xdl * k_per_xdl
|
||||
max_vector_size_a = max(1, num_tile_elements_a // block_size)
|
||||
max_vector_size_b = max(1, num_tile_elements_b // block_size)
|
||||
a_scalar_per_vector = min(a_scalar_per_vector, max_vector_size_a)
|
||||
b_scalar_per_vector = min(b_scalar_per_vector, max_vector_size_b)
|
||||
|
||||
conv = ConvInstanceTemplateParams(
|
||||
spec,
|
||||
[m_per_block, n_per_block, k_per_block],
|
||||
[m_warp, n_warp, k_warp],
|
||||
[m_per_xdl, n_per_xdl, k_per_xdl],
|
||||
double_smem_buffer,
|
||||
num_wave_groups,
|
||||
is_two_stage_instance,
|
||||
pipeline_version,
|
||||
scheduler,
|
||||
[a_scalar_per_vector, b_scalar_per_vector, c_scalar_per_vector],
|
||||
num_groups_to_merge,
|
||||
split_image,
|
||||
is_explicit_gemm,
|
||||
instance_id,
|
||||
)
|
||||
convs.append(conv)
|
||||
|
||||
return convs
|
||||
|
||||
def generate_instances_fwd(instances, problem_name, config, filter_pattern, instances_path):
|
||||
|
||||
@@ -1,177 +1,8 @@
|
||||
#include "../../builder/test/utils/ckb_conv_tile_test_configs.hpp"
|
||||
#include "ck_tile/builder/testing/conv/fwd.hpp"
|
||||
#include "ck_tile/builder/testing/conv/bwd_weight.hpp"
|
||||
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
|
||||
#include "ck_tile/builder/testing/conv/bwd_data.hpp"
|
||||
#include "signatures.hpp"
|
||||
|
||||
namespace ckb = ck_tile::builder;
|
||||
namespace ckt = ck_tile::builder::test;
|
||||
namespace cku = ck_tile::builder::test_utils;
|
||||
namespace ckf = ck_tile::builder::factory;
|
||||
|
||||
namespace ck_tile::builder::profiling {
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
// Backward Weight Signatures
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
// Backward Data Signatures
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
} // namespace ck_tile::builder::profiling
|
||||
|
||||
@@ -0,0 +1,186 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "../../builder/test/impl/conv_signature_types.hpp"
|
||||
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
|
||||
|
||||
namespace ck_tile::builder::profiling {
|
||||
|
||||
namespace ckb = ck_tile::builder;
|
||||
namespace ckt = ck_tile::builder::test;
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
/////////////////////////////////////////
|
||||
// BWD WEIGHT signatures
|
||||
//////////////////////////////////////////
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
/////////////////////////////////////////
|
||||
// BWD DATA signatures
|
||||
//////////////////////////////////////////
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_DATA =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_DATA,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
} // namespace ck_tile::builder::profiling
|
||||
@@ -50,6 +50,7 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
|
||||
"-D__HIP_PLATFORM_AMD__",
|
||||
"-D CK_EXPERIMENTAL_BUILDER=ON",
|
||||
"-O3",
|
||||
"-Wno-unknown-warning-option",
|
||||
*include_flags,
|
||||
str(cpp_file),
|
||||
"-o", str(output_file)
|
||||
@@ -63,10 +64,15 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
|
||||
timeout=300 # 5 minute timeout per file
|
||||
)
|
||||
|
||||
print(f"\n\n Command: {' '.join(cmd)}\n") if verbose else None
|
||||
|
||||
if result.returncode == 0:
|
||||
return True, ""
|
||||
else:
|
||||
# Extract the key error message
|
||||
if verbose and result.stderr:
|
||||
print(f" {result.stderr}")
|
||||
print()
|
||||
error_output = result.stderr
|
||||
return False, error_output
|
||||
|
||||
|
||||
@@ -634,22 +634,40 @@ struct TransformConvBwdDataToGemm
|
||||
constexpr auto CStride = I1;
|
||||
|
||||
// TODO Add support for NumGroupsToMerge > 1
|
||||
return make_naive_tensor_descriptor(
|
||||
make_tuple(N_, Di_, Hi_, Wi_, C_),
|
||||
make_tuple(NStride, DiStride, HiStride, WiStride, CStride),
|
||||
number<VectorSizeC>{},
|
||||
I1);
|
||||
if constexpr(ConvSpec == ConvolutionSpecialization::Filter1x1Stride1Pad0)
|
||||
{
|
||||
return make_naive_tensor_descriptor(make_tuple(N_ * Di_ * Hi_ * Wi_, C_),
|
||||
make_tuple(WiStride, CStride),
|
||||
number<VectorSizeC>{},
|
||||
I1);
|
||||
}
|
||||
else
|
||||
{
|
||||
return make_naive_tensor_descriptor(
|
||||
make_tuple(N_, Di_, Hi_, Wi_, C_),
|
||||
make_tuple(NStride, DiStride, HiStride, WiStride, CStride),
|
||||
number<VectorSizeC>{},
|
||||
I1);
|
||||
}
|
||||
}
|
||||
|
||||
template <index_t NDim = NDimSpatial, typename std::enable_if<NDim == 3, bool>::type = false>
|
||||
CK_TILE_HOST auto make_wei_grid_desc() const
|
||||
{
|
||||
// GKZYXC
|
||||
return make_naive_tensor_descriptor(
|
||||
make_tuple(K_, Z_, Y_, X_, C_),
|
||||
make_tuple(C_ * X_ * Y_ * Z_, C_ * X_ * Y_, C_ * X_, C_, I1),
|
||||
number<VectorSizeB>{},
|
||||
I1);
|
||||
if constexpr(ConvSpec == ConvolutionSpecialization::Filter1x1Stride1Pad0)
|
||||
{
|
||||
return make_naive_tensor_descriptor(
|
||||
make_tuple(K_, C_), make_tuple(C_, I1), number<VectorSizeB>{}, I1);
|
||||
}
|
||||
else
|
||||
{
|
||||
return make_naive_tensor_descriptor(
|
||||
make_tuple(K_, Z_, Y_, X_, C_),
|
||||
make_tuple(C_ * X_ * Y_ * Z_, C_ * X_ * Y_, C_ * X_, C_, I1),
|
||||
number<VectorSizeB>{},
|
||||
I1);
|
||||
}
|
||||
}
|
||||
// TODO: implement ck_tile::tensor_layout::convolution that describe packed/strided dimemsion as
|
||||
// properties
|
||||
|
||||
@@ -0,0 +1,204 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
|
||||
#include "../../experimental/builder/test/utils/conv_algorithm_type_utils.hpp"
|
||||
#include "grouped_convolution_signatures.hpp"
|
||||
#include "ck_tile/ref/naive_grouped_conv_bwd_data_gpu.hpp"
|
||||
|
||||
#include "ck_tile/builder/testing/filter_extent.hpp"
|
||||
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
|
||||
#include "ck_tile/builder/testing/conv/reference.hpp"
|
||||
#include "ck_tile/builder/conv_builder.hpp"
|
||||
#include "tile_profiler_utils.hpp"
|
||||
|
||||
namespace ck_tile::builder::profiling {
|
||||
|
||||
namespace ckb = ck_tile::builder;
|
||||
namespace ckt = ck_tile::builder::test;
|
||||
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_ndhwgc_fp32.inc"
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_nhwgc_fp32.inc"
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_nhwgc_bf16.inc"
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_nhwgc_fp16.inc"
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_ndhwgc_bf16.inc"
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_ndhwgc_fp16.inc"
|
||||
|
||||
template <auto SIGNATURE>
|
||||
void run_cpu_validation(const ckt::Args<SIGNATURE>& args,
|
||||
const ckt::Outputs<SIGNATURE>& outputs,
|
||||
const ckt::Outputs<SIGNATURE>& reference)
|
||||
{
|
||||
using DataType =
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP32,
|
||||
float,
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP16,
|
||||
ck_tile::half_t,
|
||||
ck_tile::bfloat16_t>>;
|
||||
const auto conv_param = args.to_ck_tile_conv_param();
|
||||
|
||||
const std::size_t input_bytes_num = conv_param.template GetInputByte<DataType>();
|
||||
std::vector<DataType> in(input_bytes_num / sizeof(DataType));
|
||||
std::vector<DataType> ref(input_bytes_num / sizeof(DataType));
|
||||
HIP_CHECK_ERROR(
|
||||
hipMemcpy(&ref.data()[0], reference.input, input_bytes_num, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK_ERROR(
|
||||
hipMemcpy(&in.data()[0], outputs.input, input_bytes_num, hipMemcpyDeviceToHost));
|
||||
ck_tile::check_err(in, ref, "\tError: Incorrect results!");
|
||||
}
|
||||
|
||||
/// @brief `run_grouped_conv_backward_data_tile_algs()` run all grouped conv fwd instances.
|
||||
///
|
||||
/// @tparam SIGNATURE Forward convolution signature.
|
||||
///
|
||||
/// @see run_grouped_conv_backward_data_tile_algs()
|
||||
template <auto SIGNATURE>
|
||||
std::tuple<bool, float, std::string, int, int>
|
||||
run_grouped_conv_backward_data_tile_algs(const ckt::Args<SIGNATURE>& args,
|
||||
const std::string& split_k,
|
||||
const index_t instance_index,
|
||||
const ckt::Inputs<SIGNATURE>& inputs,
|
||||
const ckt::Outputs<SIGNATURE>& outputs,
|
||||
const ck_tile::stream_config& s_conf)
|
||||
{
|
||||
float best_avg_time = std::numeric_limits<float>::max();
|
||||
std::string best_op_name, op_name;
|
||||
int best_split_k = 0;
|
||||
ck::index_t best_instance_index = -1;
|
||||
bool is_supported = false;
|
||||
float avg_time;
|
||||
bool all_instances_valid = true;
|
||||
|
||||
using DataType =
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP32,
|
||||
float,
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP16,
|
||||
ck_tile::half_t,
|
||||
ck_tile::bfloat16_t>>;
|
||||
|
||||
auto reference = ckt::alloc_outputs(args);
|
||||
using ReferenceInstance =
|
||||
typename ckb::ConvBuilder<SIGNATURE, ckt::ConvAlgorithm_Reference{}>::Instance;
|
||||
auto ref_conv = ReferenceInstance{};
|
||||
auto ref_result = ckt::run(ref_conv, args, inputs, reference.get());
|
||||
|
||||
const auto conv_param = args.to_ck_tile_conv_param();
|
||||
|
||||
// Get max possible value in the output
|
||||
const std::size_t input_bytes_num = conv_param.template GetInputByte<DataType>();
|
||||
std::vector<DataType> ref(input_bytes_num / sizeof(DataType));
|
||||
HIP_CHECK_ERROR(
|
||||
hipMemcpy(&ref.data()[0], reference.get().input, input_bytes_num, hipMemcpyDeviceToHost));
|
||||
const float max_accumulated_value = *std::max_element(ref.begin(), ref.end());
|
||||
|
||||
const index_t num_accums = conv_param.K_;
|
||||
|
||||
// BWD data doesn't support split-K autodeduce value -1
|
||||
auto split_k_values = get_split_k_values(split_k);
|
||||
split_k_values.erase(std::remove(split_k_values.begin(), split_k_values.end(), -1),
|
||||
split_k_values.end());
|
||||
|
||||
index_t num_kernel = 0;
|
||||
auto run_alg = [&](auto&& run_alg_func) {
|
||||
num_kernel++;
|
||||
// Skip if a specific instance was requested and this isn't it
|
||||
const bool running_specific_instance = (instance_index != -1);
|
||||
const bool current_is_target = (num_kernel - 1 == instance_index);
|
||||
if(running_specific_instance && !current_is_target)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
for(auto& k_batch : split_k_values)
|
||||
{
|
||||
ckt::Args<SIGNATURE> args_k_batch = args;
|
||||
args_k_batch.k_batch = k_batch;
|
||||
std::tie(is_supported, avg_time, op_name) =
|
||||
run_alg_func(args_k_batch, inputs, outputs, s_conf);
|
||||
if(is_supported)
|
||||
{
|
||||
ckt::ValidationReport report;
|
||||
auto&& [rtol, atol] =
|
||||
get_rtol_atol<SIGNATURE>(num_accums, k_batch, max_accumulated_value);
|
||||
ckt::Outputs<SIGNATURE>::reflect(
|
||||
args_k_batch,
|
||||
[&](std::string_view name,
|
||||
const auto& desc,
|
||||
void* ckt::Outputs<SIGNATURE>::*ptr) {
|
||||
report.check(name, desc, outputs.*ptr, reference.get().*ptr, rtol, atol);
|
||||
});
|
||||
|
||||
const bool valid = report.get_errors().empty();
|
||||
if(valid)
|
||||
{
|
||||
if(avg_time < best_avg_time)
|
||||
{
|
||||
best_instance_index = num_kernel - 1;
|
||||
}
|
||||
best_avg_time = std::min(best_avg_time, avg_time);
|
||||
best_op_name = best_avg_time < avg_time ? best_op_name : op_name;
|
||||
best_split_k = best_avg_time < avg_time ? best_split_k : k_batch;
|
||||
std::cout << "[Valid] Perf: " << std::setw(10) << avg_time << " ms," << " "
|
||||
<< op_name << " (instance " << num_kernel - 1 << "), SplitK "
|
||||
<< k_batch << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "[Error] " << op_name << ", SplitK " << k_batch << std::endl;
|
||||
for(const auto& error : report.get_errors())
|
||||
{
|
||||
std::cout << "\tNumber of incorrect values: " << error.wrong_elements
|
||||
<< " Is all zero:" << error.is_all_zero()
|
||||
<< " max err: " << error.max_error << std::endl;
|
||||
// Check with cpu verification to get a values
|
||||
run_cpu_validation<SIGNATURE>(args_k_batch, outputs, reference.get());
|
||||
}
|
||||
all_instances_valid = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "[Not supported] " << op_name << ", SplitK " << k_batch << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if constexpr(SIGNATURE == SIGNATURE_NHWGC_FP16_BWD_DATA)
|
||||
{
|
||||
#include "../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_nhwgc_fp16_calls.inc"
|
||||
}
|
||||
else if constexpr(SIGNATURE == SIGNATURE_NHWGC_BF16_BWD_DATA)
|
||||
{
|
||||
#include "../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_nhwgc_bf16_calls.inc"
|
||||
}
|
||||
else if constexpr(SIGNATURE == SIGNATURE_NHWGC_FP32_BWD_DATA)
|
||||
{
|
||||
#include "../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_nhwgc_fp32_calls.inc"
|
||||
}
|
||||
else if constexpr(SIGNATURE == SIGNATURE_NDHWGC_FP16_BWD_DATA)
|
||||
{
|
||||
#include "../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_ndhwgc_fp16_calls.inc"
|
||||
}
|
||||
else if constexpr(SIGNATURE == SIGNATURE_NDHWGC_BF16_BWD_DATA)
|
||||
{
|
||||
#include "../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_ndhwgc_bf16_calls.inc"
|
||||
}
|
||||
else if constexpr(SIGNATURE == SIGNATURE_NDHWGC_FP32_BWD_DATA)
|
||||
{
|
||||
#include "../../experimental/grouped_convolution_tile_instances/instances/backward_data/grouped_convolution_backward_data_tile_ndhwgc_fp32_calls.inc"
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Signature not supported" << std::endl;
|
||||
return std::make_tuple(
|
||||
false, best_avg_time, best_op_name, best_split_k, best_instance_index);
|
||||
}
|
||||
return std::make_tuple(
|
||||
all_instances_valid, best_avg_time, best_op_name, best_split_k, best_instance_index);
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::profiling
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
|
||||
#include "ck_tile/builder/testing/conv/reference.hpp"
|
||||
#include "ck_tile/builder/conv_builder.hpp"
|
||||
#include "tile_profiler_utils.hpp"
|
||||
|
||||
namespace ck_tile::builder::profiling {
|
||||
|
||||
@@ -28,26 +29,6 @@ namespace ckt = ck_tile::builder::test;
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_weight/grouped_convolution_backward_weight_tile_ndhwgc_bf16.inc"
|
||||
#include "../../../experimental/grouped_convolution_tile_instances/instances/backward_weight/grouped_convolution_backward_weight_tile_ndhwgc_fp16.inc"
|
||||
|
||||
std::vector<int> get_split_k_values(const std::string& split_k)
|
||||
{
|
||||
std::vector<int> split_k_list = {/*auto deduce value*/ -1, 1, 2, 4, 8, 16, 32, 64, 128};
|
||||
|
||||
if(split_k != "all")
|
||||
{
|
||||
try
|
||||
{
|
||||
int split_k_value = std::stoi(split_k);
|
||||
split_k_list = {split_k_value};
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << e.what() << '\n';
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
return split_k_list;
|
||||
}
|
||||
|
||||
template <auto SIGNATURE>
|
||||
void run_cpu_validation(const ckt::Args<SIGNATURE>& args,
|
||||
const ckt::Outputs<SIGNATURE>& outputs,
|
||||
@@ -71,36 +52,6 @@ void run_cpu_validation(const ckt::Args<SIGNATURE>& args,
|
||||
ck_tile::check_err(wei, ref, "\tError: Incorrect results!");
|
||||
}
|
||||
|
||||
template <auto SIGNATURE>
|
||||
std::tuple<double, double>
|
||||
get_rtol_atol(const int num_accums, const int k_batch, const float max_accumulated_value)
|
||||
{
|
||||
using WeiDataType =
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP32,
|
||||
float,
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP16,
|
||||
ck_tile::half_t,
|
||||
ck_tile::bfloat16_t>>;
|
||||
using ComputeType = WeiDataType;
|
||||
using AccDataType = float;
|
||||
|
||||
// Assign middle value of the range for auto deduce
|
||||
const int num_accums_split_k = k_batch > 0 ? k_batch : 64;
|
||||
auto rtol = ck_tile::get_relative_threshold<ComputeType, WeiDataType, AccDataType>(
|
||||
num_accums / num_accums_split_k);
|
||||
auto atol = ck_tile::get_absolute_threshold<ComputeType, WeiDataType, AccDataType>(
|
||||
max_accumulated_value / num_accums_split_k, num_accums / num_accums_split_k);
|
||||
// Calculate error due to split_k accumulation
|
||||
auto rtol_split_k =
|
||||
ck_tile::get_relative_threshold<WeiDataType, WeiDataType, WeiDataType>(num_accums_split_k);
|
||||
auto atol_split_k = ck_tile::get_absolute_threshold<WeiDataType, WeiDataType, WeiDataType>(
|
||||
max_accumulated_value, num_accums_split_k);
|
||||
// Use higher threshold
|
||||
rtol = std::max(rtol, rtol_split_k);
|
||||
atol = std::max(atol, atol_split_k);
|
||||
return std::make_tuple(rtol, atol);
|
||||
}
|
||||
|
||||
/// @brief `run_grouped_conv_backward_weight_tile_algs()` run all grouped conv fwd instances.
|
||||
///
|
||||
/// @tparam SIGNATURE Forward convolution signature.
|
||||
|
||||
@@ -5,124 +5,5 @@
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "../../experimental/builder/test/impl/conv_signature_types.hpp"
|
||||
#include "../../experimental/grouped_convolution_tile_instances/include/signatures.hpp"
|
||||
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
|
||||
|
||||
namespace ck_tile::builder::profiling {
|
||||
|
||||
namespace ckb = ck_tile::builder;
|
||||
namespace ckt = ck_tile::builder::test;
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_FWD =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::FORWARD,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
/////////////////////////////////////////
|
||||
// BWD WEIGHT signatures
|
||||
//////////////////////////////////////////
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_BF16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NHWGC_FP32_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 2,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_BF16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::BF16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP16_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP16,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
constexpr auto SIGNATURE_NDHWGC_FP32_BWD_WEIGHT =
|
||||
ckt::ConvSignature{.spatial_dim = 3,
|
||||
.direction = ckb::ConvDirection::BACKWARD_WEIGHT,
|
||||
.data_type = ckb::DataType::FP32,
|
||||
.accumulation_data_type = ckb::DataType::FP32,
|
||||
.input = {.config = {.layout = ckb::TensorLayout::NDHWGC}},
|
||||
.weight = {.config = {.layout = ckb::TensorLayout::GKZYXC}},
|
||||
.output = {.config = {.layout = ckb::TensorLayout::NDHWGK}}};
|
||||
|
||||
} // namespace ck_tile::builder::profiling
|
||||
|
||||
@@ -4,14 +4,70 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include "../../experimental/builder/test/utils/conv_algorithm_type_utils.hpp"
|
||||
|
||||
namespace ck_tile::builder::profiling {
|
||||
|
||||
namespace ckt = ck_tile::builder::test;
|
||||
|
||||
inline std::vector<int> get_split_k_values(const std::string& split_k)
|
||||
{
|
||||
std::vector<int> split_k_list = {/*auto deduce value*/ -1, 1, 2, 4, 8, 16, 32, 64, 128};
|
||||
|
||||
if(split_k != "all")
|
||||
{
|
||||
try
|
||||
{
|
||||
int split_k_value = std::stoi(split_k);
|
||||
split_k_list = {split_k_value};
|
||||
}
|
||||
catch(const std::exception& e)
|
||||
{
|
||||
std::cerr << e.what() << '\n';
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
return split_k_list;
|
||||
}
|
||||
|
||||
template <auto SIGNATURE>
|
||||
auto parse_conv_args(int arg_idx, char* const argv[])
|
||||
inline std::tuple<double, double>
|
||||
get_rtol_atol(const int num_accums, const int k_batch, const float max_accumulated_value)
|
||||
{
|
||||
using DataType =
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP32,
|
||||
float,
|
||||
std::conditional_t<SIGNATURE.data_type == ckb::DataType::FP16,
|
||||
ck_tile::half_t,
|
||||
ck_tile::bfloat16_t>>;
|
||||
using ComputeType = DataType;
|
||||
using AccDataType = float;
|
||||
|
||||
// Assign middle value of the range for auto deduce
|
||||
const int num_accums_split_k = k_batch > 0 ? k_batch : 64;
|
||||
auto rtol = ck_tile::get_relative_threshold<ComputeType, DataType, AccDataType>(
|
||||
num_accums / num_accums_split_k);
|
||||
auto atol = ck_tile::get_absolute_threshold<ComputeType, DataType, AccDataType>(
|
||||
max_accumulated_value / num_accums_split_k, num_accums / num_accums_split_k);
|
||||
// Calculate error due to split_k accumulation
|
||||
auto rtol_split_k =
|
||||
ck_tile::get_relative_threshold<DataType, DataType, DataType>(num_accums_split_k);
|
||||
auto atol_split_k = ck_tile::get_absolute_threshold<DataType, DataType, DataType>(
|
||||
max_accumulated_value, num_accums_split_k);
|
||||
// Use higher threshold
|
||||
rtol = std::max(rtol, rtol_split_k);
|
||||
atol = std::max(atol, atol_split_k);
|
||||
return std::make_tuple(rtol, atol);
|
||||
}
|
||||
|
||||
template <auto SIGNATURE>
|
||||
inline ckt::Args<SIGNATURE> parse_conv_args(int arg_idx, char* const argv[])
|
||||
{
|
||||
const std::size_t G = static_cast<size_t>(std::stol(argv[arg_idx++]));
|
||||
const std::size_t N = static_cast<size_t>(std::stol(argv[arg_idx++]));
|
||||
|
||||
@@ -46,6 +46,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
|
||||
if(CK_EXPERIMENTAL_BUILDER)
|
||||
list(APPEND PROFILER_OPS profile_grouped_conv_fwd_tile.cpp)
|
||||
list(APPEND PROFILER_OPS profile_grouped_conv_bwd_weight_tile.cpp)
|
||||
list(APPEND PROFILER_OPS profile_grouped_conv_bwd_data_tile.cpp)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -275,6 +276,7 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
|
||||
if(CK_EXPERIMENTAL_BUILDER)
|
||||
list(APPEND DEVICE_INSTANCES device_grouped_conv_fwd_tile_instances)
|
||||
list(APPEND DEVICE_INSTANCES device_grouped_conv_bwd_weight_tile_instances)
|
||||
list(APPEND DEVICE_INSTANCES device_grouped_conv_bwd_data_tile_instances)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
218
profiler/src/profile_grouped_conv_bwd_data_tile.cpp
Normal file
218
profiler/src/profile_grouped_conv_bwd_data_tile.cpp
Normal file
@@ -0,0 +1,218 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "ck_tile/builder/testing/conv/ck_tile.hpp"
|
||||
#include "ck_tile/host/device_prop.hpp"
|
||||
#include "profiler/grouped_convolution_backward_data_tile_algs.hpp"
|
||||
#include "profiler/tile_profiler_utils.hpp"
|
||||
#include "profiler/profiler_arg_utils.hpp"
|
||||
|
||||
#include "profiler_operation_registry.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
enum struct ConvLayout
|
||||
{
|
||||
GNHWC_GKYXC_GNHWK, // 0
|
||||
NHWGC_GKYXC_NHWGK, // 1
|
||||
NGCHW_GKYXC_NGKHW, // 2
|
||||
NGCHW_GKCYX_NGKHW, // 3
|
||||
};
|
||||
|
||||
enum struct ConvDataType
|
||||
{
|
||||
F32_F32_F32, // 0
|
||||
F16_F16_F16, // 1
|
||||
BF16_BF16_BF16, // 2
|
||||
F32_F32_F32_TF32, // 3
|
||||
};
|
||||
|
||||
#define OP_NAME "grouped_conv_bwd_data_tile"
|
||||
#define OP_DESC "Grouped Convolution Backward Data (CK Tile)"
|
||||
|
||||
static void print_helper_msg()
|
||||
{
|
||||
std::cout
|
||||
// clang-format off
|
||||
<< "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
|
||||
<< "arg2: data type (0: Output fp32, Weight fp32, Input fp32\n"
|
||||
<< " 1: Output fp16, Weight fp16, Input fp16\n"
|
||||
<< " 2: Output bf16, Weight bf16, Input bf16\n"
|
||||
<< " 3: Output fp32, Weight fp32, Input fp32, Compute tf32)\n"
|
||||
<< "arg3: tensor layout (0: Output[G, N, Ho, Wo, C], Weight[G, K, Y, X, C], Input[G, N, Hi, Wi, K]\n"
|
||||
<< " 1: Output[N, Ho, Wo, G, C], Weight[G, K, Y, X, C], Input[N, Hi, Wi, G, K])\n"
|
||||
<< " 2: Output[N, G, C, Ho, Wo], Weight[G, K, Y, X, C], Input[N, G, K, Hi, Wi])\n"
|
||||
<< " 3: Output[N, G, C, Ho, Wo], Weight[G, K, C, Y, X], Input[N, G, K, Hi, Wi])\n"
|
||||
<< "arg4: verification (0: no, 1: yes)\n"
|
||||
<< "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n"
|
||||
<< "arg6: print tensor value (0: no; 1: yes)\n"
|
||||
<< "arg7: time kernel (0: no, 1: yes)\n"
|
||||
<< ck::utils::conv::get_conv_param_parser_helper_msg() << std::endl
|
||||
<< "Last argument: split-K (0: internally computed split-K value; 1, 2, 4, 8, 16, 32, 64, 128: set k batches explicitly)\n"
|
||||
<< "\nOptional arguments:\n"
|
||||
<< " --instance <id> Run only the specified instance (0-indexed among valid instances)\n";
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
namespace ckb = ck_tile::builder;
|
||||
namespace ckt = ck_tile::builder::test;
|
||||
namespace ckp = ck_tile::builder::profiling;
|
||||
|
||||
template <auto SIGNATURE>
|
||||
int call_profiler(const ckt::Args<SIGNATURE>& args,
|
||||
const std::string& split_k,
|
||||
bool time_kernel,
|
||||
ck_tile::index_t instance_index)
|
||||
{
|
||||
auto inputs = ckt::alloc_inputs(args);
|
||||
auto outputs = ckt::alloc_outputs(args);
|
||||
ckt::init_inputs(args, inputs.get());
|
||||
|
||||
std::cout << args.make_input_descriptor() << std::endl;
|
||||
std::cout << args.make_weight_descriptor() << std::endl;
|
||||
std::cout << args.make_output_descriptor() << std::endl;
|
||||
auto&& [valid, avg_time, op_name, best_split_k, best_instance_index] =
|
||||
ckp::run_grouped_conv_backward_data_tile_algs(
|
||||
args,
|
||||
split_k,
|
||||
instance_index,
|
||||
inputs.get(),
|
||||
outputs.get(),
|
||||
ck_tile::stream_config{nullptr,
|
||||
time_kernel,
|
||||
0 /*log_level*/,
|
||||
5 /*cold_iters*/,
|
||||
50 /*nrepeat_*/,
|
||||
true /*is_gpu_timer_*/});
|
||||
if(time_kernel)
|
||||
{
|
||||
std::cout << "\nBest configuration parameters:" << "\n\tname: " << op_name << " (instance "
|
||||
<< best_instance_index << ")" << "\n\tavg_time: " << avg_time << ", SplitK "
|
||||
<< best_split_k << std::endl;
|
||||
}
|
||||
return !valid;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int profile_grouped_conv_bwd_data_tile(int argc, char* argv[])
|
||||
{
|
||||
// Parse optional named arguments first
|
||||
ck_tile::index_t instance_index = -1;
|
||||
bool dummy;
|
||||
ck::profiler::parse_named_args(argc, argv, instance_index, dummy);
|
||||
const int named_arg_count = ck::profiler::count_named_args(argc, argv);
|
||||
|
||||
// Adjust argc for positional argument checking
|
||||
const int positional_argc = argc - named_arg_count;
|
||||
|
||||
// 8 for control, 1 for num_dim_spatial
|
||||
if(positional_argc < 9)
|
||||
{
|
||||
print_helper_msg();
|
||||
return 1;
|
||||
}
|
||||
|
||||
const auto data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
|
||||
const auto layout = static_cast<ConvLayout>(std::stoi(argv[3]));
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
const int num_dim_spatial = std::stoi(argv[8]);
|
||||
|
||||
// 8 for control, 1 for num_dim_spatial, 4 for G/N/K/C, and 6 * num_dim_spatial, 1 for split-K
|
||||
if(positional_argc != 8 + 1 + 4 + 6 * num_dim_spatial + 1)
|
||||
{
|
||||
print_helper_msg();
|
||||
return 1;
|
||||
}
|
||||
|
||||
constexpr ck_tile::index_t conv_params_start_idx = 9;
|
||||
const auto params =
|
||||
ck::utils::conv::parse_conv_param(num_dim_spatial, conv_params_start_idx, argv);
|
||||
std::cout << params << std::endl;
|
||||
|
||||
auto split_k = std::string(argv[8 + 1 + 4 + 6 * num_dim_spatial]);
|
||||
|
||||
// The bwd data profiler in old CK uses -1 to loop over all split-K values.
|
||||
// We want to have the same API for backward compatibility, but we need to convert it to "all"
|
||||
// for the new API.
|
||||
if(split_k == "-1")
|
||||
{
|
||||
split_k = "all";
|
||||
}
|
||||
|
||||
if(layout == ConvLayout::NHWGC_GKYXC_NHWGK)
|
||||
{
|
||||
if(num_dim_spatial == 2)
|
||||
{
|
||||
if(data_type == ConvDataType::F16_F16_F16)
|
||||
{
|
||||
constexpr auto SIGNATURE = ckp::SIGNATURE_NHWGC_FP16_BWD_DATA;
|
||||
return call_profiler<SIGNATURE>(
|
||||
ckp::parse_conv_args<SIGNATURE>(conv_params_start_idx, argv),
|
||||
split_k,
|
||||
time_kernel,
|
||||
instance_index);
|
||||
}
|
||||
else if(data_type == ConvDataType::BF16_BF16_BF16)
|
||||
{
|
||||
constexpr auto SIGNATURE = ckp::SIGNATURE_NHWGC_BF16_BWD_DATA;
|
||||
return call_profiler<SIGNATURE>(
|
||||
ckp::parse_conv_args<SIGNATURE>(conv_params_start_idx, argv),
|
||||
split_k,
|
||||
time_kernel,
|
||||
instance_index);
|
||||
}
|
||||
else if(data_type == ConvDataType::F32_F32_F32)
|
||||
{
|
||||
constexpr auto SIGNATURE = ckp::SIGNATURE_NHWGC_FP32_BWD_DATA;
|
||||
return call_profiler<SIGNATURE>(
|
||||
ckp::parse_conv_args<SIGNATURE>(conv_params_start_idx, argv),
|
||||
split_k,
|
||||
time_kernel,
|
||||
instance_index);
|
||||
}
|
||||
}
|
||||
else if(num_dim_spatial == 3)
|
||||
{
|
||||
if(data_type == ConvDataType::F16_F16_F16)
|
||||
{
|
||||
constexpr auto SIGNATURE = ckp::SIGNATURE_NDHWGC_FP16_BWD_DATA;
|
||||
return call_profiler<SIGNATURE>(
|
||||
ckp::parse_conv_args<SIGNATURE>(conv_params_start_idx, argv),
|
||||
split_k,
|
||||
time_kernel,
|
||||
instance_index);
|
||||
}
|
||||
else if(data_type == ConvDataType::BF16_BF16_BF16)
|
||||
{
|
||||
constexpr auto SIGNATURE = ckp::SIGNATURE_NDHWGC_BF16_BWD_DATA;
|
||||
return call_profiler<SIGNATURE>(
|
||||
ckp::parse_conv_args<SIGNATURE>(conv_params_start_idx, argv),
|
||||
split_k,
|
||||
time_kernel,
|
||||
instance_index);
|
||||
}
|
||||
else if(data_type == ConvDataType::F32_F32_F32)
|
||||
{
|
||||
constexpr auto SIGNATURE = ckp::SIGNATURE_NDHWGC_FP32_BWD_DATA;
|
||||
return call_profiler<SIGNATURE>(
|
||||
ckp::parse_conv_args<SIGNATURE>(conv_params_start_idx, argv),
|
||||
split_k,
|
||||
time_kernel,
|
||||
instance_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "this data_type & layout is not implemented" << std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_grouped_conv_bwd_data_tile);
|
||||
Reference in New Issue
Block a user