mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
Add examples of Conv + reduction (data type: int4, int8, bf16, fp16, fp32) (#380)
* Refactor the design of DeviceGemmMultipleDMultipleR_Xdl_CShuffle * Add 'DeviceGroupedConvFwdMultipleDMultipleR' interface * Add DeviceGroupedConvFwdMultipleDMultipleR_Xdl_CShuffle * Remove 'GridwiseConvFwdMultipleDMultipleR_xdl_cshuffle' * Add 'TransformConvFwdToGemm<>' utility class (from Chao) * Use 'TransformConvFwdToGemm<>' to shorten code * Fix ill-formed method declaration * Re-implement MakeRGridDescriptor_M() function * Change problem description * Use macro to define layout types * Define K-reduced output tensor layout types * Let user to decide R output tensor layout * Rename variables * Add padding to the reduced output tensor if necessary * Extract common code as helper method * Remove debug message * Add missing include directive * Add partial fp16 Conv + Reduction example * Add example verification code for 2D Conv problem * Use type alias to simplify code * Share code across different-dimension Conv problems * Rename file/functions from run_conv_fwd* to run_convnd_fwd* * Make example code more verbose * Add code to support 1D & 3D Conv + Reduction on host * Add more examples for data type: bf16, fp32 * Add example for int8 * Add custom target to group examples * Use more general custom target name * Change the description in error message * Disable testing for example other than fp32 * Add examplel for int4 (just copy from int8) * Fix wrong data type * Use larger data type for intermediate tensors * Finish int4 example * Undefine macro PP_DEFINE_LAYOUT_TYPE() after use * Use named variables to replace magic numbers * Remove debug messages * Use same A/B data type for host Conv in int4 example * Add check for the 'RLayout' type argument * Group same-dim-layouts together in 'LayoutSetting<>' * Add 'final' specifier to utility classes * Use different initialization method for examples * Remove macro PP_DEFINE_LAYOUT_TYPE() * Fix code-comment mismatch * Use more reasonable initialization value for all data types * Default use init_method=1 for all examples * Remove never-used code * Remove confusing out-of-date comments * clean Co-authored-by: Chao Liu <chao.liu2@amd.com> Co-authored-by: Chao Liu <lc.roy86@gmail.com>
This commit is contained in:
@@ -0,0 +1,77 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ck/tensor_operation/gpu/device/device_base.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
|
||||
// Grouped Convolution Forward:
|
||||
// input : input image A[G, N, C, Hi, Wi],
|
||||
// input : weight B[G, K, C, Y, X],
|
||||
// input : D0[G, N, K, Ho, Wo], D1[G, N, K, Ho, Wo], ...
|
||||
// output : output image E[G, N, K, Ho, Wo]
|
||||
// output : R0[G, N, Ho, Wo], R1[G, N, Ho, Wo], ...
|
||||
// C = a_op(A) * b_op(B)
|
||||
// E = cde_op(C, D0, D1, ...)
|
||||
// Q0 = reduce0(q_op0(E)), Q1 = reduce1(q_op0(E)), ...
|
||||
// R0 = r_op0(Q0), R1 = r_op1(Q1), ...
|
||||
// Assume:
|
||||
// D0, D1, ... and E have the same layout
|
||||
template <index_t NDimSpatial,
|
||||
typename ALayout,
|
||||
typename BLayout,
|
||||
typename DELayout,
|
||||
typename RLayout,
|
||||
typename ADataType,
|
||||
typename BDataType,
|
||||
typename DsDataType,
|
||||
typename EDataType,
|
||||
typename RsDataType,
|
||||
typename AElementwiseOperation,
|
||||
typename BElementwiseOperation,
|
||||
typename CDEElementwiseOperation,
|
||||
typename QsElementwiseOperation,
|
||||
typename RsElementwiseOperation>
|
||||
struct DeviceGroupedConvFwdMultipleDMultipleR : public BaseOperator
|
||||
{
|
||||
static constexpr index_t NumDTensor = DsDataType::Size();
|
||||
static constexpr index_t NumRTensor = RsDataType::Size();
|
||||
|
||||
virtual std::unique_ptr<BaseArgument> MakeArgumentPointer(
|
||||
const void* p_a,
|
||||
const void* p_b,
|
||||
const std::array<const void*, NumDTensor>& p_ds,
|
||||
void* p_e,
|
||||
std::array<void*, NumRTensor> p_rs,
|
||||
const std::array<index_t, NDimSpatial + 3>& a_g_n_c_wis_lengths,
|
||||
const std::array<index_t, NDimSpatial + 3>& a_g_n_c_wis_strides,
|
||||
const std::array<index_t, NDimSpatial + 3>& b_g_k_c_xs_lengths,
|
||||
const std::array<index_t, NDimSpatial + 3>& b_g_k_c_xs_strides,
|
||||
const std::array<std::array<index_t, NDimSpatial + 3>, NumDTensor>& ds_g_n_k_wos_lengths,
|
||||
const std::array<std::array<index_t, NDimSpatial + 3>, NumDTensor>& ds_g_n_k_wos_strides,
|
||||
const std::array<index_t, NDimSpatial + 3>& e_g_n_k_wos_lengths,
|
||||
const std::array<index_t, NDimSpatial + 3>& e_g_n_k_wos_strides,
|
||||
const std::array<index_t, NDimSpatial + 2>& r_g_n_wos_lengths,
|
||||
const std::array<index_t, NDimSpatial + 2>& r_g_n_wos_strides,
|
||||
const std::array<index_t, NDimSpatial>& conv_filter_strides,
|
||||
const std::array<index_t, NDimSpatial>& conv_filter_dilations,
|
||||
const std::array<index_t, NDimSpatial>& input_left_pads,
|
||||
const std::array<index_t, NDimSpatial>& input_right_pads,
|
||||
const AElementwiseOperation& a_element_op,
|
||||
const BElementwiseOperation& b_element_op,
|
||||
const CDEElementwiseOperation& cde_element_op,
|
||||
const QsElementwiseOperation& qs_element_op,
|
||||
const RsElementwiseOperation& rs_element_op) = 0;
|
||||
|
||||
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
|
||||
};
|
||||
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
File diff suppressed because it is too large
Load Diff
@@ -93,7 +93,7 @@ struct GNDHWC : public BaseTensorLayout
|
||||
};
|
||||
|
||||
// input tensor
|
||||
// packed GNWC/GNHWC/GNDHWC
|
||||
// packed NWGC/NHWGC/NDHWGC
|
||||
struct NWGC : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "NWGC";
|
||||
@@ -330,6 +330,54 @@ struct G_NDHW_K : public BaseTensorLayout
|
||||
static constexpr const char* name = "G_NDHW_K";
|
||||
};
|
||||
|
||||
// K-reduced output tensor (packed)
|
||||
struct GNW : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "GNW";
|
||||
};
|
||||
|
||||
struct GNHW : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "GNHW";
|
||||
};
|
||||
|
||||
struct GNDHW : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "GNDHW";
|
||||
};
|
||||
|
||||
// K-reduced output tensor (packed)
|
||||
struct NWG : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "NWG";
|
||||
};
|
||||
|
||||
struct NHWG : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "NHWG";
|
||||
};
|
||||
|
||||
struct NDHWG : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "NDHWG";
|
||||
};
|
||||
|
||||
// K-reduced output tensor (strided)
|
||||
struct G_NW : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "G_NW";
|
||||
};
|
||||
|
||||
struct G_NHW : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "G_NHW";
|
||||
};
|
||||
|
||||
struct G_NDHW : public BaseTensorLayout
|
||||
{
|
||||
static constexpr const char* name = "G_NDHW";
|
||||
};
|
||||
|
||||
} // namespace convolution
|
||||
|
||||
template <
|
||||
|
||||
Reference in New Issue
Block a user