mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 18:42:06 +00:00
Add column to image kernel (#930)
* Add column to image kernel * Minor fixes for dtypes and client examples * Disable tests for disabled dtypes * Disable add instances functions for disabled data types * Minor stylistic fixes * Revert "Disable add instances functions for disabled data types" This reverts commit728b869563. * Instances reduction * Add comments in device_column_to_image_impl * Update changelog and Copyrights * Improve changelog [ROCm/composable_kernel commit:e2243a4d1e]
This commit is contained in:
@@ -2,9 +2,11 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
|
||||
set(target 0)
|
||||
foreach(gpu IN LISTS GPU_TARGETS)
|
||||
if(gpu IN_LIST gpu_list AND target EQUAL 0)
|
||||
add_custom_target(example_image_to_column)
|
||||
add_custom_target(example_im2col_col2im)
|
||||
add_example_executable(example_image_to_column_f32 image_to_column_f32.cpp)
|
||||
add_dependencies(example_image_to_column example_image_to_column_f32)
|
||||
add_dependencies(example_im2col_col2im example_image_to_column_f32)
|
||||
add_example_executable(example_column_to_image_f32 column_to_image_f32.cpp)
|
||||
add_dependencies(example_im2col_col2im example_column_to_image_f32)
|
||||
set(target 1)
|
||||
endif()
|
||||
endforeach()
|
||||
165
example/52_im2col_col2im/column_to_image_f32.cpp
Normal file
165
example/52_im2col_col2im/column_to_image_f32.cpp
Normal file
@@ -0,0 +1,165 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
using InDataType = FP32; // ck::bhalf_t;//FP32;
|
||||
using OutDataType = FP32; // ck::bhalf_t;//FP32;
|
||||
|
||||
using ImLayout = ck::tensor_layout::convolution::GNHWC;
|
||||
using ColumnToImageOp = ck::conv_tensor_rearrange_op::ColumnToImage;
|
||||
|
||||
// clang-format off
|
||||
using DeviceColToImgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl
|
||||
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1>;
|
||||
// clang-format on
|
||||
|
||||
bool RunColumnToImage(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params)
|
||||
{
|
||||
|
||||
const auto N = conv_params.N_;
|
||||
const auto C = conv_params.C_;
|
||||
|
||||
const ck::index_t NDoHoWo =
|
||||
N * ck::accumulate_n<ck::index_t>(
|
||||
conv_params.output_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
|
||||
const ck::index_t CZYX =
|
||||
C * ck::accumulate_n<ck::index_t>(
|
||||
conv_params.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
|
||||
|
||||
const auto in_desc = HostTensorDescriptor({NDoHoWo, CZYX});
|
||||
const auto out_desc =
|
||||
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<ImLayout>(conv_params);
|
||||
|
||||
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial> output_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial + 3> image_g_n_c_wis_strides{};
|
||||
std::array<ck::index_t, 2> gemm_m_k_strides{};
|
||||
std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
|
||||
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
std::array<ck::index_t, NDimSpatial> input_right_pads{};
|
||||
|
||||
auto copy = [](const auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
|
||||
|
||||
copy(conv_params.input_spatial_lengths_, input_spatial_lengths);
|
||||
copy(conv_params.filter_spatial_lengths_, filter_spatial_lengths);
|
||||
copy(conv_params.output_spatial_lengths_, output_spatial_lengths);
|
||||
copy(in_desc.GetStrides(), gemm_m_k_strides);
|
||||
copy(out_desc.GetStrides(), image_g_n_c_wis_strides);
|
||||
copy(conv_params.conv_filter_strides_, conv_filter_strides);
|
||||
copy(conv_params.conv_filter_dilations_, conv_filter_dilations);
|
||||
copy(conv_params.input_left_pads_, input_left_pads);
|
||||
copy(conv_params.input_right_pads_, input_right_pads);
|
||||
|
||||
Tensor<InDataType> in(in_desc);
|
||||
Tensor<OutDataType> out_device(out_desc);
|
||||
Tensor<OutDataType> out_host(out_desc);
|
||||
|
||||
std::cout << "in: " << in.mDesc << std::endl;
|
||||
std::cout << "out: " << out_device.mDesc << std::endl;
|
||||
|
||||
switch(config.init_method)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: in.GenerateTensorValue(GeneratorTensor_2<InDataType>{1, 2}); break;
|
||||
default: in.GenerateTensorValue(GeneratorTensor_3<InDataType>{-0.5, 0.5});
|
||||
}
|
||||
|
||||
DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
|
||||
DeviceMem out_device_buf(sizeof(OutDataType) * out_device.mDesc.GetElementSpaceSize());
|
||||
|
||||
in_device_buf.ToDevice(in.mData.data());
|
||||
|
||||
// reset input to zero
|
||||
out_device_buf.SetZero();
|
||||
|
||||
static_assert(std::is_default_constructible_v<DeviceColToImgInstance>);
|
||||
|
||||
// do conv
|
||||
auto col2img = DeviceColToImgInstance{};
|
||||
auto invoker = col2img.MakeInvoker();
|
||||
auto argument = col2img.MakeArgument(in_device_buf.GetDeviceBuffer(),
|
||||
out_device_buf.GetDeviceBuffer(),
|
||||
N,
|
||||
C,
|
||||
input_spatial_lengths,
|
||||
filter_spatial_lengths,
|
||||
output_spatial_lengths,
|
||||
image_g_n_c_wis_strides,
|
||||
gemm_m_k_strides,
|
||||
conv_filter_strides,
|
||||
conv_filter_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads);
|
||||
|
||||
if(!col2img.IsSupportedArgument(argument))
|
||||
{
|
||||
std::cerr << "wrong! device_col2img with the specified compilation parameters does "
|
||||
"not support this col2img problem"
|
||||
<< std::endl;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
|
||||
std::size_t num_btype = NDoHoWo * CZYX * (sizeof(OutDataType) + sizeof(InDataType));
|
||||
float gb_per_sec = num_btype / 1.E6 / ave_time;
|
||||
std::cout << "Perf: " << ave_time << " ms, " << gb_per_sec << " GB/s" << std::endl;
|
||||
|
||||
if(config.do_verification)
|
||||
{
|
||||
auto ref_column_to_image = ck::tensor_operation::host::
|
||||
ReferenceColumnToImage<NDimSpatial, ImLayout, InDataType, OutDataType>();
|
||||
|
||||
auto ref_invoker = ref_column_to_image.MakeInvoker();
|
||||
|
||||
auto ref_argument = ref_column_to_image.MakeArgument(in,
|
||||
out_host,
|
||||
conv_params.filter_spatial_lengths_,
|
||||
conv_params.conv_filter_strides_,
|
||||
conv_params.conv_filter_dilations_,
|
||||
conv_params.input_left_pads_,
|
||||
conv_params.input_right_pads_);
|
||||
|
||||
if(!ref_column_to_image.IsSupportedArgument(&ref_argument))
|
||||
{
|
||||
std::cerr << "wrong! ref_col2img with the specified compilation parameters does "
|
||||
"not support this col2img problem"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
ref_invoker.Run(ref_argument);
|
||||
out_device_buf.FromDevice(out_device.mData.data());
|
||||
return ck::utils::check_err(out_device.mData, out_host.mData);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int RunColumnToImageExample(int argc, char* argv[])
|
||||
{
|
||||
ExecutionConfig config;
|
||||
ck::utils::conv::ConvParam conv_params = DefaultConvParams;
|
||||
|
||||
if(!parse_cmd_args(argc, argv, config, conv_params))
|
||||
{
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if(conv_params.num_dim_spatial_ != NDimSpatial)
|
||||
{
|
||||
std::cerr << "unsupported # of spatial dimensions" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return !RunColumnToImage(config, conv_params);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) { return RunColumnToImageExample(argc, argv); }
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
@@ -20,6 +21,7 @@
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp"
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
@@ -32,7 +34,7 @@ struct ExecutionConfig final
|
||||
{
|
||||
bool do_verification = true;
|
||||
int init_method = 1;
|
||||
bool time_kernel = true;
|
||||
bool time_kernel = false;
|
||||
};
|
||||
|
||||
#define DefaultConvParams \
|
||||
@@ -6,15 +6,16 @@
|
||||
using InDataType = FP32;
|
||||
using OutDataType = FP32;
|
||||
|
||||
using InLayout = ck::tensor_layout::convolution::GNHWC;
|
||||
using ImLayout = ck::tensor_layout::convolution::GNHWC;
|
||||
using ImageToColumnOp = ck::conv_tensor_rearrange_op::ImageToColumn;
|
||||
|
||||
// clang-format off
|
||||
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
|
||||
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
|
||||
//#####################| Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
|
||||
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
|
||||
//#####################| Spatial| | | | | | | Lengths| Vector|
|
||||
//#####################| | | | | | | | | |
|
||||
< NDimSpatial, InLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1>;
|
||||
< NDimSpatial, ImLayout, InDataType, OutDataType, 256, 128, 128, S<16, 16>, 1>;
|
||||
// clang-format on
|
||||
|
||||
bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::ConvParam& conv_params)
|
||||
@@ -31,14 +32,14 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
|
||||
conv_params.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
|
||||
|
||||
const auto in_desc =
|
||||
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(conv_params);
|
||||
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<ImLayout>(conv_params);
|
||||
const auto out_desc = HostTensorDescriptor({NDoHoWo, CZYX});
|
||||
|
||||
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial> output_spatial_lengths{};
|
||||
std::array<ck::index_t, NDimSpatial + 3> input_g_n_c_wis_strides{};
|
||||
std::array<ck::index_t, 2> output_m_k_strides{};
|
||||
std::array<ck::index_t, NDimSpatial + 3> image_g_n_c_wis_strides{};
|
||||
std::array<ck::index_t, 2> gemm_m_k_strides{};
|
||||
std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
|
||||
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
@@ -49,8 +50,8 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
|
||||
copy(conv_params.input_spatial_lengths_, input_spatial_lengths);
|
||||
copy(conv_params.filter_spatial_lengths_, filter_spatial_lengths);
|
||||
copy(conv_params.output_spatial_lengths_, output_spatial_lengths);
|
||||
copy(in_desc.GetStrides(), input_g_n_c_wis_strides);
|
||||
copy(out_desc.GetStrides(), output_m_k_strides);
|
||||
copy(in_desc.GetStrides(), image_g_n_c_wis_strides);
|
||||
copy(out_desc.GetStrides(), gemm_m_k_strides);
|
||||
copy(conv_params.conv_filter_strides_, conv_filter_strides);
|
||||
copy(conv_params.conv_filter_dilations_, conv_filter_dilations);
|
||||
copy(conv_params.input_left_pads_, input_left_pads);
|
||||
@@ -90,8 +91,8 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
|
||||
input_spatial_lengths,
|
||||
filter_spatial_lengths,
|
||||
output_spatial_lengths,
|
||||
input_g_n_c_wis_strides,
|
||||
output_m_k_strides,
|
||||
image_g_n_c_wis_strides,
|
||||
gemm_m_k_strides,
|
||||
conv_filter_strides,
|
||||
conv_filter_dilations,
|
||||
input_left_pads,
|
||||
@@ -114,7 +115,7 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
|
||||
if(config.do_verification)
|
||||
{
|
||||
auto ref_image_to_column = ck::tensor_operation::host::
|
||||
ReferenceImageToColumn<NDimSpatial, InLayout, InDataType, OutDataType>();
|
||||
ReferenceImageToColumn<NDimSpatial, ImLayout, InDataType, OutDataType>();
|
||||
|
||||
auto ref_invoker = ref_image_to_column.MakeInvoker();
|
||||
|
||||
Reference in New Issue
Block a user