mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 04:19:36 +00:00
Add column to image kernel (#930)
* Add column to image kernel * Minor fixes for dtypes and client examples * Disable tests for disabled dtypes * Disable add instances functions for disabled data types * Minor stylistic fixes * Revert "Disable add instances functions for disabled data types" This reverts commit728b869563. * Instances reduction * Add comments in device_column_to_image_impl * Update changelog and Copyrights * Improve changelog [ROCm/composable_kernel commit:e2243a4d1e]
This commit is contained in:
@@ -0,0 +1,363 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <type_traits>
|
||||
#include <sstream>
|
||||
|
||||
#include "ck/tensor_operation/gpu/device/device_base.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace host {
|
||||
|
||||
/**
|
||||
* \brief Reference implementation for column to image.
|
||||
*
|
||||
* Input tensor descriptor has [N * Do * Ho * Wo, Z * Y * X * C] data layout.
|
||||
* Memory layout is the same.
|
||||
* Output tensor descriptor has [G, N, C, Di, Hi, Wi] data layout.
|
||||
* G must be equal to 1. Memory layout is [G, N, Di, Hi, Wi, C].
|
||||
*
|
||||
* \tparam NDimSpatial Number of spatial dimensions.
|
||||
* \tparam ImageLayout Image Layout.
|
||||
* \tparam InDataType Input Data Type.
|
||||
* \tparam OutDataType Output Data Type.
|
||||
*/
|
||||
template <ck::index_t NDimSpatial,
|
||||
typename ImageLayout,
|
||||
typename InDataType,
|
||||
typename OutDataType,
|
||||
typename std::enable_if<NDimSpatial >= 1 && NDimSpatial <= 3, bool>::type = false>
|
||||
struct ReferenceColumnToImage : public device::BaseOperator
|
||||
{
|
||||
// Argument
|
||||
struct Argument : public device::BaseArgument
|
||||
{
|
||||
public:
|
||||
Argument(const Tensor<InDataType>& input,
|
||||
Tensor<OutDataType>& output,
|
||||
std::vector<ck::index_t> filter_spatial_lengths,
|
||||
std::vector<ck::index_t> conv_filter_strides,
|
||||
std::vector<ck::index_t> conv_filter_dilations,
|
||||
std::vector<ck::index_t> input_left_pads,
|
||||
std::vector<ck::index_t> input_right_pads)
|
||||
: input_{input},
|
||||
output_{output},
|
||||
conv_strides_{conv_filter_strides},
|
||||
conv_dilations_{conv_filter_dilations},
|
||||
in_left_pads_{input_left_pads},
|
||||
in_right_pads_{input_right_pads},
|
||||
filter_spatial_lengths_{filter_spatial_lengths}
|
||||
{
|
||||
initOutputSpatialLengths();
|
||||
}
|
||||
|
||||
const Tensor<InDataType>& input_;
|
||||
Tensor<OutDataType>& output_;
|
||||
|
||||
std::vector<index_t> conv_strides_;
|
||||
std::vector<index_t> conv_dilations_;
|
||||
std::vector<index_t> in_left_pads_;
|
||||
std::vector<index_t> in_right_pads_;
|
||||
|
||||
std::vector<index_t> filter_spatial_lengths_;
|
||||
std::vector<index_t> output_spatial_lengths_;
|
||||
|
||||
private:
|
||||
void initOutputSpatialLengths()
|
||||
{
|
||||
constexpr auto input_offset_to_spatial = 3;
|
||||
|
||||
for(ck::index_t i = 0; i < NDimSpatial; ++i)
|
||||
{
|
||||
// XEff = (X - 1) * conv_dilation_w + 1;
|
||||
// Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
|
||||
const ck::index_t x_eff = (filter_spatial_lengths_[i] - 1) * conv_dilations_[i] + 1;
|
||||
|
||||
output_spatial_lengths_.push_back(
|
||||
(output_.GetLengths()[i + input_offset_to_spatial] + in_left_pads_[i] +
|
||||
in_right_pads_[i] - x_eff) /
|
||||
conv_strides_[i] +
|
||||
1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct Invoker : public device::BaseInvoker
|
||||
{
|
||||
using Argument = ReferenceColumnToImage::Argument;
|
||||
|
||||
float Run(const Argument& arg)
|
||||
{
|
||||
if(!(arg.output_.GetNumOfDimension() == NDimSpatial + 3 &&
|
||||
arg.input_.GetNumOfDimension() == 2))
|
||||
{
|
||||
throw std::runtime_error("wrong! inconsistent dimension");
|
||||
}
|
||||
|
||||
const index_t N = arg.output_.GetLengths()[1];
|
||||
const index_t C = arg.output_.GetLengths()[2];
|
||||
|
||||
if constexpr(NDimSpatial == 1)
|
||||
{
|
||||
const index_t Wo = arg.output_spatial_lengths_[0];
|
||||
auto func = [&](auto n) {
|
||||
for(index_t wo = 0; wo < Wo; ++wo)
|
||||
{
|
||||
index_t row = n * Wo + wo;
|
||||
index_t column = 0;
|
||||
|
||||
for(index_t x = 0; x < arg.filter_spatial_lengths_[0]; ++x)
|
||||
{
|
||||
auto wi = static_cast<ck::long_index_t>(wo * arg.conv_strides_[0]) +
|
||||
static_cast<ck::long_index_t>(x * arg.conv_dilations_[0]) -
|
||||
static_cast<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
|
||||
for(index_t c = 0; c < C; ++c)
|
||||
{
|
||||
if(wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) < arg.output_.GetLengths()[3])
|
||||
{
|
||||
float v_in = ck::type_convert<float>(arg.input_(row, column));
|
||||
float v_out = ck::type_convert<float>(arg.output_(0, n, c, wi));
|
||||
arg.output_(0, n, c, wi) =
|
||||
ck::type_convert<OutDataType>(v_in + v_out);
|
||||
}
|
||||
column++;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(func, N)(std::thread::hardware_concurrency());
|
||||
|
||||
return 0;
|
||||
}
|
||||
else if constexpr(NDimSpatial == 2)
|
||||
{
|
||||
const index_t Ho = arg.output_spatial_lengths_[0];
|
||||
const index_t Wo = arg.output_spatial_lengths_[1];
|
||||
|
||||
auto func = [&](auto n) {
|
||||
for(index_t ho = 0; ho < Ho; ++ho)
|
||||
{
|
||||
for(index_t wo = 0; wo < Wo; ++wo)
|
||||
{
|
||||
index_t row = n * Ho * Wo + ho * Wo + wo;
|
||||
index_t column = 0;
|
||||
|
||||
for(index_t y = 0; y < arg.filter_spatial_lengths_[0]; ++y)
|
||||
{
|
||||
auto hi =
|
||||
static_cast<ck::long_index_t>(ho * arg.conv_strides_[0]) +
|
||||
static_cast<ck::long_index_t>(y * arg.conv_dilations_[0]) -
|
||||
static_cast<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
|
||||
for(index_t x = 0; x < arg.filter_spatial_lengths_[1]; ++x)
|
||||
{
|
||||
auto wi =
|
||||
static_cast<ck::long_index_t>(wo * arg.conv_strides_[1]) +
|
||||
static_cast<ck::long_index_t>(x * arg.conv_dilations_[1]) -
|
||||
static_cast<ck::long_index_t>(arg.in_left_pads_[1]);
|
||||
|
||||
for(index_t c = 0; c < C; ++c)
|
||||
{
|
||||
|
||||
if(hi >= 0 &&
|
||||
ck::type_convert<std::size_t>(hi) <
|
||||
arg.output_.GetLengths()[3] &&
|
||||
wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) <
|
||||
arg.output_.GetLengths()[4])
|
||||
{
|
||||
float v_in =
|
||||
ck::type_convert<float>(arg.input_(row, column));
|
||||
float v_out = ck::type_convert<float>(
|
||||
arg.output_(0, n, c, hi, wi));
|
||||
arg.output_(0, n, c, hi, wi) =
|
||||
ck::type_convert<OutDataType>(v_in + v_out);
|
||||
}
|
||||
column++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(func, N)(std::thread::hardware_concurrency());
|
||||
|
||||
return 0;
|
||||
}
|
||||
else if constexpr(NDimSpatial == 3)
|
||||
{
|
||||
const index_t Do = arg.output_spatial_lengths_[0];
|
||||
const index_t Ho = arg.output_spatial_lengths_[1];
|
||||
const index_t Wo = arg.output_spatial_lengths_[2];
|
||||
|
||||
auto func = [&](auto n) {
|
||||
for(index_t d_o = 0; d_o < Do; ++d_o)
|
||||
{
|
||||
for(index_t ho = 0; ho < Ho; ++ho)
|
||||
{
|
||||
for(index_t wo = 0; wo < Wo; ++wo)
|
||||
{
|
||||
index_t row = n * Do * Ho * Wo + d_o * Ho * Wo + ho * Wo + wo;
|
||||
index_t column = 0;
|
||||
|
||||
for(index_t z = 0; z < arg.filter_spatial_lengths_[0]; ++z)
|
||||
{
|
||||
auto di =
|
||||
static_cast<ck::long_index_t>(d_o * arg.conv_strides_[0]) +
|
||||
static_cast<ck::long_index_t>(z * arg.conv_dilations_[0]) -
|
||||
static_cast<ck::long_index_t>(arg.in_left_pads_[0]);
|
||||
for(index_t y = 0; y < arg.filter_spatial_lengths_[1]; ++y)
|
||||
{
|
||||
auto hi =
|
||||
static_cast<ck::long_index_t>(ho *
|
||||
arg.conv_strides_[1]) +
|
||||
static_cast<ck::long_index_t>(y *
|
||||
arg.conv_dilations_[1]) -
|
||||
static_cast<ck::long_index_t>(arg.in_left_pads_[1]);
|
||||
for(index_t x = 0; x < arg.filter_spatial_lengths_[2]; ++x)
|
||||
{
|
||||
auto wi =
|
||||
static_cast<ck::long_index_t>(
|
||||
wo * arg.conv_strides_[2]) +
|
||||
static_cast<ck::long_index_t>(
|
||||
x * arg.conv_dilations_[2]) -
|
||||
static_cast<ck::long_index_t>(arg.in_left_pads_[2]);
|
||||
for(index_t c = 0; c < C; ++c)
|
||||
{
|
||||
if(di >= 0 &&
|
||||
ck::type_convert<std::size_t>(di) <
|
||||
arg.output_.GetLengths()[3] &&
|
||||
hi >= 0 &&
|
||||
ck::type_convert<std::size_t>(hi) <
|
||||
arg.output_.GetLengths()[4] &&
|
||||
wi >= 0 &&
|
||||
ck::type_convert<std::size_t>(wi) <
|
||||
arg.output_.GetLengths()[5])
|
||||
{
|
||||
float v_in = ck::type_convert<float>(
|
||||
arg.input_(row, column));
|
||||
float v_out = ck::type_convert<float>(
|
||||
arg.output_(0, n, c, di, hi, wi));
|
||||
arg.output_(0, n, c, di, hi, wi) =
|
||||
ck::type_convert<OutDataType>(v_in + v_out);
|
||||
}
|
||||
column++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(func, N)(std::thread::hardware_concurrency());
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
float Run(const device::BaseArgument* p_arg,
|
||||
const StreamConfig& /*stream_config*/ = StreamConfig{}) override
|
||||
{
|
||||
return Run(*dynamic_cast<const Argument*>(p_arg));
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr bool IsValidCompilationParameter()
|
||||
{
|
||||
using namespace tensor_layout::convolution;
|
||||
|
||||
if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> ||
|
||||
std::is_same_v<ImageLayout, GNDHWC>))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!(NDimSpatial >= 1 && NDimSpatial <= 3))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsSupportedArgument(const Argument& arg)
|
||||
{
|
||||
const ck::index_t G = arg.output_.GetLengths()[0];
|
||||
const ck::index_t N = arg.output_.GetLengths()[1];
|
||||
const ck::index_t C = arg.output_.GetLengths()[2];
|
||||
|
||||
const index_t NDoHoWo =
|
||||
N * ck::accumulate_n<index_t>(
|
||||
arg.output_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
|
||||
const index_t CZYX =
|
||||
C * ck::accumulate_n<index_t>(
|
||||
arg.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
|
||||
|
||||
if(!(arg.input_.GetLengths()[0] == static_cast<std::size_t>(NDoHoWo) &&
|
||||
arg.input_.GetLengths()[1] == static_cast<std::size_t>(CZYX)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if(G != 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsSupportedArgument(const device::BaseArgument* p_arg) override
|
||||
{
|
||||
return IsSupportedArgument(*dynamic_cast<const Argument*>(p_arg));
|
||||
}
|
||||
|
||||
static auto MakeArgument(const Tensor<InDataType>& input,
|
||||
Tensor<OutDataType>& output,
|
||||
std::vector<ck::index_t> filter_spatial_lengths,
|
||||
std::vector<ck::index_t> conv_filter_strides,
|
||||
std::vector<ck::index_t> conv_filter_dilations,
|
||||
std::vector<ck::index_t> input_left_pads,
|
||||
std::vector<ck::index_t> input_right_pads)
|
||||
{
|
||||
return Argument{input,
|
||||
output,
|
||||
filter_spatial_lengths,
|
||||
conv_filter_strides,
|
||||
conv_filter_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads};
|
||||
}
|
||||
|
||||
static auto MakeInvoker() { return Invoker{}; }
|
||||
|
||||
virtual std::unique_ptr<device::BaseInvoker> MakeInvokerPointer()
|
||||
{
|
||||
return std::make_unique<Invoker>(Invoker{});
|
||||
}
|
||||
|
||||
std::string GetTypeString() const override
|
||||
{
|
||||
auto str = std::stringstream();
|
||||
|
||||
// clang-format off
|
||||
str << "ReferenceColumnToImage"
|
||||
<< std::endl;
|
||||
// clang-format on
|
||||
|
||||
return str.str();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace host
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -18,16 +18,18 @@ namespace host {
|
||||
/**
|
||||
* \brief Reference implementation for image to column.
|
||||
*
|
||||
* Tensor descriptor has [G, N, C, Di, Hi, Wi] data layout.
|
||||
* Input tensor descriptor has [G, N, C, Di, Hi, Wi] data layout.
|
||||
* G must be equal to 1. Memory layout is [G, N, Di, Hi, Wi, C].
|
||||
* Output tensor descriptor has [N * Do * Ho * Wo, Z * Y * X * C] data layout.
|
||||
* Memory layout is the same.
|
||||
*
|
||||
* \tparam NDimSpatial Number of spatial dimensions.
|
||||
* \tparam InputLayout Input Layout.
|
||||
* \tparam ImageLayout Image Layout.
|
||||
* \tparam InDataType Input Data Type.
|
||||
* \tparam OutDataType Output Data Type.
|
||||
*/
|
||||
template <ck::index_t NDimSpatial,
|
||||
typename InputLayout,
|
||||
typename ImageLayout,
|
||||
typename InDataType,
|
||||
typename OutDataType,
|
||||
typename std::enable_if<NDimSpatial >= 1 && NDimSpatial <= 3, bool>::type = false>
|
||||
@@ -240,8 +242,8 @@ struct ReferenceImageToColumn : public device::BaseOperator
|
||||
{
|
||||
using namespace tensor_layout::convolution;
|
||||
|
||||
if constexpr(!(std::is_same_v<InputLayout, GNWC> || std::is_same_v<InputLayout, GNHWC> ||
|
||||
std::is_same_v<InputLayout, GNDHWC>))
|
||||
if constexpr(!(std::is_same_v<ImageLayout, GNWC> || std::is_same_v<ImageLayout, GNHWC> ||
|
||||
std::is_same_v<ImageLayout, GNDHWC>))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,282 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
using namespace ck::conv_tensor_rearrange_op;
|
||||
|
||||
// Image to Column
|
||||
// nhwc, 1d
|
||||
void add_device_image_to_column_nwc_1d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_nwc_1d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_nwc_1d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_nwc_1d_i8_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>&
|
||||
instances);
|
||||
// nhwc, 2d
|
||||
void add_device_image_to_column_nhwc_2d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, BF16, BF16, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_2d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F16, F16, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_2d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F32, F32, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_2d_i8_instances(
|
||||
std::vector<
|
||||
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ImageToColumn>>>&
|
||||
instances);
|
||||
// nhwc, 3d
|
||||
void add_device_image_to_column_ndhwc_3d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_ndhwc_3d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_ndhwc_3d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
void add_device_image_to_column_ndhwc_3d_i8_instances(
|
||||
std::vector<
|
||||
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>&
|
||||
instances);
|
||||
|
||||
// Column to Image
|
||||
// nhwc, 1d
|
||||
void add_device_column_to_image_nwc_1d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_nwc_1d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_nwc_1d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_nwc_1d_i8_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>&
|
||||
instances);
|
||||
// nhwc, 2d
|
||||
void add_device_column_to_image_nhwc_2d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, BF16, BF16, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_nhwc_2d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F16, F16, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_nhwc_2d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F32, F32, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_nhwc_2d_i8_instances(
|
||||
std::vector<
|
||||
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ColumnToImage>>>&
|
||||
instances);
|
||||
// nhwc, 3d
|
||||
void add_device_column_to_image_ndhwc_3d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_ndhwc_3d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_ndhwc_3d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
void add_device_column_to_image_ndhwc_3d_i8_instances(
|
||||
std::vector<
|
||||
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>&
|
||||
instances);
|
||||
|
||||
template <ck::index_t NumDimSpatial,
|
||||
typename ImageLayout,
|
||||
typename InDataType,
|
||||
typename OutDataType,
|
||||
typename ConvTensorRearrangeOp>
|
||||
struct DeviceOperationInstanceFactory<
|
||||
ck::tensor_operation::device::DeviceConvTensorRearrange<NumDimSpatial,
|
||||
ImageLayout,
|
||||
InDataType,
|
||||
OutDataType,
|
||||
ConvTensorRearrangeOp>>
|
||||
{
|
||||
using DeviceOp = DeviceConvTensorRearrange<NumDimSpatial,
|
||||
ImageLayout,
|
||||
InDataType,
|
||||
OutDataType,
|
||||
ConvTensorRearrangeOp>;
|
||||
|
||||
static auto GetInstances()
|
||||
{
|
||||
std::vector<std::unique_ptr<DeviceOp>> op_ptrs;
|
||||
|
||||
if constexpr(is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
|
||||
{
|
||||
if constexpr(NumDimSpatial == 1 && is_same_v<ImageLayout, GNWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_image_to_column_nwc_1d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_image_to_column_nwc_1d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_image_to_column_nwc_1d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_image_to_column_nwc_1d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
else if constexpr(NumDimSpatial == 3 && is_same_v<ImageLayout, GNDHWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_image_to_column_ndhwc_3d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_image_to_column_ndhwc_3d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_image_to_column_ndhwc_3d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_image_to_column_ndhwc_3d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if constexpr(is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
|
||||
{
|
||||
if constexpr(NumDimSpatial == 1 && is_same_v<ImageLayout, GNWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_column_to_image_nwc_1d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_column_to_image_nwc_1d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_column_to_image_nwc_1d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_column_to_image_nwc_1d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
else if constexpr(NumDimSpatial == 2 && is_same_v<ImageLayout, GNHWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_column_to_image_nhwc_2d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_column_to_image_nhwc_2d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_column_to_image_nhwc_2d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_column_to_image_nhwc_2d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
else if constexpr(NumDimSpatial == 3 && is_same_v<ImageLayout, GNDHWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_column_to_image_ndhwc_3d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_column_to_image_ndhwc_3d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_column_to_image_ndhwc_3d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_column_to_image_ndhwc_3d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return op_ptrs;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -0,0 +1,106 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
using namespace ck::tensor_layout::convolution;
|
||||
using namespace ck::conv_tensor_rearrange_op;
|
||||
|
||||
using BF16 = ck::bhalf_t;
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
|
||||
template <ck::index_t NDimSpatial, typename InLayout>
|
||||
using device_column_to_image_bf16_instances = std::tuple<
|
||||
// clang-format off
|
||||
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
// generic instance
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1>,
|
||||
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
template <ck::index_t NDimSpatial, typename InLayout>
|
||||
using device_column_to_image_f16_instances = std::tuple<
|
||||
// clang-format off
|
||||
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
// generic instance
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1>,
|
||||
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
template <ck::index_t NDimSpatial, typename InLayout>
|
||||
using device_column_to_image_f32_instances = std::tuple<
|
||||
// clang-format off
|
||||
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
// generic instance
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1>,
|
||||
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
template <ck::index_t NDimSpatial, typename InLayout>
|
||||
using device_column_to_image_i8_instances = std::tuple<
|
||||
// clang-format off
|
||||
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
// generic instance
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1>,
|
||||
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8>,
|
||||
DeviceColumnToImageImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 256, 256, S<16, 16>, 16>
|
||||
// clang-format on
|
||||
>;
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
@@ -13,6 +13,7 @@ namespace device {
|
||||
namespace instance {
|
||||
|
||||
using namespace ck::tensor_layout::convolution;
|
||||
using namespace ck::conv_tensor_rearrange_op;
|
||||
|
||||
using BF16 = ck::bhalf_t;
|
||||
using F16 = ck::half_t;
|
||||
@@ -28,17 +29,12 @@ using device_image_to_column_bf16_instances = std::tuple<
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 8, 8, S<8, 8>, 1>,
|
||||
// generic instance
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 16, 16, S<8, 8>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 64, 64, 64, S<8, 8>, 8>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 16, 16, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 64, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 128, 64, 128, S<8, 16>, 8>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 16, 16, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, BF16, BF16, 256, 128, 128, S<16, 16>, 8>
|
||||
@@ -52,17 +48,13 @@ using device_image_to_column_f16_instances = std::tuple<
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 8, 8, S<8, 8>, 1>,
|
||||
// generic instance
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 16, 16, S<8, 8>, 1>,
|
||||
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 64, 64, 64, S<8, 8>, 8>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 16, 16, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 64, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 128, 64, 128, S<8, 16>, 8>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 16, 16, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F16, F16, 256, 128, 128, S<16, 16>, 8>
|
||||
@@ -76,15 +68,11 @@ using device_image_to_column_f32_instances = std::tuple<
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 8, 8, S<8, 8>, 1>,
|
||||
// generic instance
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 16, 16, S<8, 8>, 1>,
|
||||
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 16, 16, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 64, 64, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 16, 16, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, F32, F32, 256, 128, 128, S<16, 16>, 4>
|
||||
// clang-format on
|
||||
@@ -97,17 +85,13 @@ using device_image_to_column_i8_instances = std::tuple<
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 8, 8, S<8, 8>, 1>,
|
||||
// generic instance
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 16, 16, S<8, 8>, 1>,
|
||||
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 32, 32, S<8, 8>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 64, 64, 64, S<8, 8>, 8>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 16, 16, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 64, S<8, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 32, 64, S<8, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 128, 64, 128, S<8, 16>, 8>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 16, 16, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 1>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 64, 64, S<16, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 4>,
|
||||
DeviceImageToColumnImpl<NDimSpatial, InLayout, int8_t, int8_t, 256, 128, 128, S<16, 16>, 8>,
|
||||
@@ -1,135 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
|
||||
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
// nhwc, 1d
|
||||
void add_device_image_to_column_nhwc_1d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, BF16, BF16>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_1d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, F16, F16>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_1d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, F32, F32>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_1d_i8_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, int8_t, int8_t>>>& instances);
|
||||
// nhwc, 2d
|
||||
void add_device_image_to_column_nhwc_2d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, BF16, BF16>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_2d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, F16, F16>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_2d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, F32, F32>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_2d_i8_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, int8_t, int8_t>>>& instances);
|
||||
// nhwc, 3d
|
||||
void add_device_image_to_column_nhwc_3d_bf16_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, BF16, BF16>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_3d_f16_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, F16, F16>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_3d_f32_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, F32, F32>>>& instances);
|
||||
|
||||
void add_device_image_to_column_nhwc_3d_i8_instances(
|
||||
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, int8_t, int8_t>>>& instances);
|
||||
|
||||
template <ck::index_t NumDimSpatial, typename InLayout, typename InDataType, typename OutDataType>
|
||||
struct DeviceOperationInstanceFactory<
|
||||
ck::tensor_operation::device::
|
||||
DeviceImageToColumn<NumDimSpatial, InLayout, InDataType, OutDataType>>
|
||||
{
|
||||
using DeviceOp = DeviceImageToColumn<NumDimSpatial, InLayout, InDataType, OutDataType>;
|
||||
|
||||
static auto GetInstances()
|
||||
{
|
||||
std::vector<std::unique_ptr<DeviceOp>> op_ptrs;
|
||||
|
||||
if constexpr(NumDimSpatial == 1 && is_same_v<InLayout, GNWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_1d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_1d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_1d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_1d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
else if constexpr(NumDimSpatial == 2 && is_same_v<InLayout, GNHWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_2d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
else if constexpr(NumDimSpatial == 3 && is_same_v<InLayout, GNDHWC>)
|
||||
{
|
||||
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_3d_f32_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_3d_f16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
|
||||
is_same_v<OutDataType, ck::bhalf_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_3d_bf16_instances(op_ptrs);
|
||||
}
|
||||
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
|
||||
{
|
||||
add_device_image_to_column_nhwc_3d_i8_instances(op_ptrs);
|
||||
}
|
||||
}
|
||||
|
||||
return op_ptrs;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
Reference in New Issue
Block a user