mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
[CK_BUILDER] Add grouped conv fwd ck tile profiler (#3518)
* [BULDER] Add grouped conv fwd ck tile profiler * [CK TILE] Fix grouped conv kernels splitk and double lds * Updates * Fixes * Move to ckProfiler * Fixes * fix * fix * Change instances to empty list by default * fix * fix * Update grouped_convolution_signatures.hpp * Update grouped_convolution_forward_tile_algs.hpp * [CK TILE] Add grouped convolution forward tests (#3556) * [CK TILE] Add grouped convolution forward tests * fix jenkins * fixes * comments fixes * unit test * unit test fix * Move instances outside builder * fix includes * clang format fix * readme fix * fix includes * fixes
This commit is contained in:
@@ -98,27 +98,26 @@ struct ConvTileFactory
|
||||
using GemmPipeline = typename internal::TilePipelineType<
|
||||
BLOCK_GEMM.pipeline_version>::template GemmPipeline<UniversalGemmProblem>;
|
||||
|
||||
using ConvEpilogue = ck_tile::CShuffleEpilogue<ck_tile::CShuffleEpilogueProblem<
|
||||
typename Types::ADataType,
|
||||
typename Types::BDataType,
|
||||
typename Types::DsDataTypes,
|
||||
typename Types::AccDataType,
|
||||
typename Types::EDataType,
|
||||
typename GroupedConvTraitsType::ImplicitGemmDsLayout,
|
||||
typename GroupedConvTraitsType::FixedGemmParams::ELayout,
|
||||
typename Ops::CDEElementwiseOp,
|
||||
BLOCK.per_block.m,
|
||||
BLOCK.per_block.n,
|
||||
BLOCK_GEMM.warps.m,
|
||||
BLOCK_GEMM.warps.n,
|
||||
BLOCK_GEMM.warp_tile.m,
|
||||
BLOCK_GEMM.warp_tile.n,
|
||||
BLOCK_GEMM.warp_tile.k,
|
||||
GroupedConvTraitsType::FixedGemmParams::TransposeC,
|
||||
// TODO:: This template parameter will be moved inside the kernel
|
||||
BLOCK_GEMM.num_wave_groups,
|
||||
GroupedConvTraitsType::FixedGemmParams::FixedVectorSize,
|
||||
SCALAR_PER_VECTOR.c>>;
|
||||
using ConvEpilogue = ck_tile::CShuffleEpilogue<
|
||||
ck_tile::CShuffleEpilogueProblem<typename Types::ADataType,
|
||||
typename Types::BDataType,
|
||||
typename Types::DsDataTypes,
|
||||
typename Types::AccDataType,
|
||||
typename Types::EDataType,
|
||||
typename GroupedConvTraitsType::ImplicitGemmDsLayout,
|
||||
typename GroupedConvTraitsType::FixedGemmParams::ELayout,
|
||||
typename Ops::CDEElementwiseOp,
|
||||
BLOCK.per_block.m,
|
||||
BLOCK.per_block.n,
|
||||
BLOCK_GEMM.warps.m,
|
||||
BLOCK_GEMM.warps.n,
|
||||
BLOCK_GEMM.warp_tile.m,
|
||||
BLOCK_GEMM.warp_tile.n,
|
||||
BLOCK_GEMM.warp_tile.k,
|
||||
GroupedConvTraitsType::FixedGemmParams::TransposeC,
|
||||
BLOCK_GEMM.num_wave_groups,
|
||||
GroupedConvTraitsType::FixedGemmParams::FixedVectorSize,
|
||||
SCALAR_PER_VECTOR.c>>;
|
||||
|
||||
using Instance = typename internal::GroupedConvolutionTileKernel<SIGNATURE,
|
||||
GroupedConvTraitsType,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "ck_tile/builder/testing/testing_reflect.hpp"
|
||||
#include "ck_tile/builder/testing/filter_extent.hpp"
|
||||
#include "ck_tile/builder/testing/tensor_buffer.hpp"
|
||||
#include "ck_tile/host/convolution_parameter.hpp"
|
||||
#include "ck_tile/builder/testing/tensor_initialization.hpp"
|
||||
#include "ck_tile/builder/testing/tensor_descriptor.hpp"
|
||||
#include "ck_tile/builder/testing/validation.hpp"
|
||||
@@ -93,6 +94,8 @@ struct Args<SIGNATURE>
|
||||
Ops::WeiElementwiseOp b_elementwise_op;
|
||||
Ops::OutElementwiseOp cde_elementwise_op;
|
||||
|
||||
int k_batch = 1;
|
||||
|
||||
/// This function returns the `TensorDescriptor` corresponding to
|
||||
/// the input-tensor of the convolution problem. This can then
|
||||
/// be used to, for example, allocate memory.
|
||||
@@ -169,6 +172,36 @@ struct Args<SIGNATURE>
|
||||
to_vector(this->input_left_pad),
|
||||
to_vector(this->input_right_pad));
|
||||
}
|
||||
|
||||
/// Convert the Args structure into a CK Tile conv_param structure.
|
||||
/// This function is mainly used to be able to use the existing
|
||||
/// CK Tile functionality to obtain tensor descriptors.
|
||||
ck_tile::conv::ConvParam to_ck_tile_conv_param() const
|
||||
{
|
||||
const auto to_vector = [](const auto& extent) {
|
||||
if constexpr(SPATIAL_DIM == 1)
|
||||
return std::vector<ck_tile::index_t>{ck::index_t(extent.width)};
|
||||
else if constexpr(SPATIAL_DIM == 2)
|
||||
return std::vector<ck_tile::index_t>{ck::index_t(extent.height),
|
||||
ck::index_t(extent.width)};
|
||||
else
|
||||
return std::vector<ck_tile::index_t>{ck::index_t(extent.depth),
|
||||
ck::index_t(extent.height),
|
||||
ck::index_t(extent.width)};
|
||||
};
|
||||
|
||||
return ck_tile::conv::ConvParam(SPATIAL_DIM,
|
||||
this->lengths.groups,
|
||||
this->lengths.batch_size,
|
||||
this->lengths.output_channels,
|
||||
this->lengths.input_channels,
|
||||
to_vector(this->lengths.filter),
|
||||
to_vector(this->lengths.image),
|
||||
to_vector(this->filter_strides),
|
||||
to_vector(this->filter_dilation),
|
||||
to_vector(this->input_left_pad),
|
||||
to_vector(this->input_right_pad));
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief `Inputs` specialization for forward convolution.
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "ck_tile/builder/testing/conv_fwd.hpp"
|
||||
#include "ck_tile/host/kernel_launch.hpp"
|
||||
#include "ck_tile/builder/factory/helpers/ck/conv_elementwise_op.hpp"
|
||||
#include <type_traits>
|
||||
#include <array>
|
||||
@@ -87,16 +88,19 @@ concept CkConvInstance = detail::CkConvInstance<Conv, SIGNATURE>;
|
||||
/// @brief `run()` specialization for forward convolution and old CK.
|
||||
///
|
||||
/// @tparam SIGNATURE Forward convolution signature.
|
||||
/// @throws std::runtime_error if the arguments werent actually valid for the
|
||||
/// @throws std::runtime_error if the arguments weren't actually valid for the
|
||||
/// operation. This should be caught and reported by the testing framework.
|
||||
/// @return std::tuple<bool, float> - whether the problem is supported and
|
||||
/// kernel execution time (0.0f if s_conf time_kernel is false).
|
||||
///
|
||||
/// @see run()
|
||||
template <auto SIGNATURE>
|
||||
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE>
|
||||
void run(CkConvInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs)
|
||||
std::tuple<bool, float> run(CkConvInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs,
|
||||
const StreamConfig s_conf = {})
|
||||
{
|
||||
constexpr auto spatial_dim = SIGNATURE.spatial_dim;
|
||||
|
||||
@@ -144,10 +148,10 @@ void run(CkConvInstance<SIGNATURE> auto& conv,
|
||||
|
||||
if(!conv.IsSupportedArgument(ck_args))
|
||||
{
|
||||
throw std::runtime_error("invalid argument");
|
||||
std::cout << "invalid argument" << std::endl;
|
||||
}
|
||||
|
||||
conv.MakeInvoker().Run(ck_args, {});
|
||||
return std::make_tuple(true, conv.MakeInvoker().Run(ck_args, s_conf));
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ck_tile/builder/testing/conv_fwd.hpp"
|
||||
#include "ck_tile/host/kernel_launch.hpp"
|
||||
#include "ck_tile/builder/factory/helpers/ck/conv_elementwise_op.hpp"
|
||||
#include "ck_tile/ops/gemm.hpp"
|
||||
#include "ck_tile/ops/grouped_convolution.hpp"
|
||||
#include <type_traits>
|
||||
#include <array>
|
||||
|
||||
/// This file contains the implementation details for invoking/testing
|
||||
/// grouped convolution operations in CK Tile. The main item is the
|
||||
/// `run()` function, which is the main implementation used to invoke
|
||||
/// CK Tile grouped forward convolution kernels.
|
||||
|
||||
namespace ck_tile::builder::test {
|
||||
|
||||
namespace detail {
|
||||
|
||||
/// @brief Concept for checking whether this is the CK Tile convolution
|
||||
/// implementation.
|
||||
///
|
||||
/// This is the same as `::ck_tile::builder::test::CkConvInstance`, except
|
||||
/// with some utility aliases. For that reason, its moved to this detail
|
||||
/// namespace.
|
||||
template <typename Conv, auto SIGNATURE>
|
||||
concept CkTileConvInstance = requires(Conv&) {
|
||||
{ Conv::BlockSize() };
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/// @brief Concept for checking whether a convolution is invoked like CK Tile.
|
||||
///
|
||||
/// This concept is used to tell whether a convolution implementation is
|
||||
/// likely to be an "CK Tile" implementation - that is, whether we should
|
||||
/// invoke it as an CK Tile kernel. This is mainly used with `run()` to
|
||||
/// differentiate which implementation that should be invoked.
|
||||
///
|
||||
/// - SIGNATURE is the operation signature.
|
||||
/// - Conv is a convolution instance created by the CK Builder API.
|
||||
template <typename Conv, auto SIGNATURE>
|
||||
concept CkTileConvInstance = detail::CkTileConvInstance<Conv, SIGNATURE>;
|
||||
|
||||
/// @brief `run()` specialization for forward convolution and CK Tile.
|
||||
///
|
||||
/// @tparam SIGNATURE Forward convolution signature.
|
||||
/// @throws std::runtime_error if the arguments weren't actually valid for the
|
||||
/// operation. This should be caught and reported by the testing framework.
|
||||
/// @return std::tuple<bool, float> - whether the problem is supported and
|
||||
/// kernel execution time (0.0f if s_conf time_kernel is false).
|
||||
///
|
||||
/// @see run()
|
||||
template <auto SIGNATURE>
|
||||
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE>
|
||||
std::tuple<bool, float> run(CkTileConvInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs,
|
||||
const ck_tile::stream_config s_conf = {})
|
||||
{
|
||||
using Conv = std::remove_reference_t<decltype(conv)>;
|
||||
const auto param = args.to_ck_tile_conv_param();
|
||||
|
||||
ck_tile::GroupedConvFwdHostArgs<> host_args(
|
||||
param, inputs.input, inputs.weight, {}, outputs.output, args.k_batch);
|
||||
|
||||
auto kargs = Conv::MakeKernelArgs(host_args);
|
||||
|
||||
const dim3 grids = Conv::GridSize(kargs);
|
||||
const dim3 blocks = Conv::BlockSize();
|
||||
|
||||
if(!Conv::IsSupportedArgument(kargs))
|
||||
{
|
||||
std::cout << "Not supported!";
|
||||
return std::make_tuple(false, 0.f);
|
||||
}
|
||||
|
||||
constexpr index_t minimum_occupancy =
|
||||
Conv::GemmPipeline::Scheduler == ck_tile::GemmPipelineScheduler::Intrawave ? 1 : 2;
|
||||
|
||||
return std::make_tuple(
|
||||
true,
|
||||
ck_tile::launch_kernel(
|
||||
s_conf, ck_tile::make_kernel<minimum_occupancy>(conv, grids, blocks, 0, kargs)));
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
@@ -62,6 +62,8 @@ concept RefConvInstance = requires(Conv& conv,
|
||||
/// @throws std::runtime_error if the arguments weren't actually valid for the
|
||||
/// operation. This should be caught and reported by the testing framework.
|
||||
///
|
||||
/// @return std::tuple<bool, float> - whether the problem is supported and
|
||||
/// kernel execution time (0.0f for reference).
|
||||
/// @see run()
|
||||
template <auto SIGNATURE>
|
||||
requires ValidConvSignature<SIGNATURE> &&
|
||||
@@ -69,10 +71,10 @@ template <auto SIGNATURE>
|
||||
// for now, just concern outselves with reference and see when the
|
||||
// rest of the bwd/weight plumbing is there.
|
||||
ConvDirectionIsForward<SIGNATURE>
|
||||
void run(RefConvInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs)
|
||||
std::tuple<bool, float> run(RefConvInstance<SIGNATURE> auto& conv,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs)
|
||||
{
|
||||
// We don't want to compute the output dims manually, just get
|
||||
// them via the existing infrastructure
|
||||
@@ -86,15 +88,27 @@ void run(RefConvInstance<SIGNATURE> auto& conv,
|
||||
for(auto right_pad : param.input_right_pads_)
|
||||
{
|
||||
if(right_pad != 0)
|
||||
throw std::runtime_error("TODO: Support right pad in reference conv");
|
||||
{
|
||||
std::cout << "TODO: Support right pad in reference conv" << std::endl;
|
||||
return std::make_tuple(false, 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
if(!args.make_input_descriptor().is_packed())
|
||||
throw std::runtime_error("TODO: Support non-packed input tensor in reference conv");
|
||||
{
|
||||
std::cout << "TODO: Support non-packed input tensor in reference conv" << std::endl;
|
||||
return std::make_tuple(false, 0.0f);
|
||||
}
|
||||
if(!args.make_weight_descriptor().is_packed())
|
||||
throw std::runtime_error("TODO: Support non-packed weight tensor in reference conv");
|
||||
{
|
||||
std::cout << "TODO: Support non-packed weight tensor in reference conv" << std::endl;
|
||||
return std::make_tuple(false, 0.0f);
|
||||
}
|
||||
if(!args.make_output_descriptor().is_packed())
|
||||
throw std::runtime_error("TODO: Support non-packed output tensor in reference conv");
|
||||
{
|
||||
std::cout << "TODO: Support non-packed output tensor in reference conv" << std::endl;
|
||||
return std::make_tuple(false, 0.0f);
|
||||
}
|
||||
|
||||
conv.Run(inputs.input,
|
||||
inputs.weight,
|
||||
@@ -109,6 +123,7 @@ void run(RefConvInstance<SIGNATURE> auto& conv,
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_);
|
||||
return std::make_tuple(true, 0.0f);
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
|
||||
@@ -34,4 +34,25 @@ struct FilterExtent<3>
|
||||
size_t depth = 1;
|
||||
};
|
||||
|
||||
template <int SPATIAL_DIM>
|
||||
inline FilterExtent<SPATIAL_DIM> filter_extent_from_vector(const std::vector<std::size_t>& vec);
|
||||
|
||||
template <>
|
||||
inline FilterExtent<1> filter_extent_from_vector<1>(const std::vector<std::size_t>& vec)
|
||||
{
|
||||
return FilterExtent<1>{.width = vec[0]};
|
||||
}
|
||||
|
||||
template <>
|
||||
inline FilterExtent<2> filter_extent_from_vector<2>(const std::vector<std::size_t>& vec)
|
||||
{
|
||||
return FilterExtent<2>{.width = vec[1], .height = vec[0]};
|
||||
}
|
||||
|
||||
template <>
|
||||
inline FilterExtent<3> filter_extent_from_vector<3>(const std::vector<std::size_t>& vec)
|
||||
{
|
||||
return FilterExtent<3>{.width = vec[2], .height = vec[1], .depth = vec[0]};
|
||||
}
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
|
||||
@@ -418,6 +418,10 @@ struct TensorDescriptor
|
||||
size_t x = 1;
|
||||
for(size_t i = 0; i < RANK; ++i)
|
||||
{
|
||||
if(lengths[indices[i]] == 1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if(strides[indices[i]] != x)
|
||||
return false;
|
||||
|
||||
@@ -443,6 +447,15 @@ struct TensorDescriptor
|
||||
return TensorDescriptor<DT, 1>(lengths, strides);
|
||||
}
|
||||
|
||||
/// @brief Print tensor descriptor details.
|
||||
///
|
||||
/// Print tensor descriptor details - lengths and strides.
|
||||
friend std::ostream& operator<<(std::ostream& os, const TensorDescriptor<DT, RANK>& tensor_desc)
|
||||
{
|
||||
os << tensor_desc.inner_descriptor_;
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
ck_tile::HostTensorDescriptor inner_descriptor_;
|
||||
};
|
||||
|
||||
@@ -317,13 +317,17 @@ ValidationReport validate(const Args<SIGNATURE>& args,
|
||||
/// @param inputs The input tensor data. Will not be modified by this function.
|
||||
/// @param outputs The output tensor data. The contents will be overwritten by
|
||||
/// this function.
|
||||
/// @param s_conf Stream config used to launch kernel.
|
||||
/// @return std::tuple<bool, float> - whether the problem is supported and
|
||||
/// kernel execution time (0.0f if s_conf time_kernel is false).
|
||||
///
|
||||
/// @note This function is explicitly deleted to generate compile errors
|
||||
/// for missing implementations.
|
||||
template <auto SIGNATURE, typename Operation>
|
||||
void run(Operation& operation,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs) = delete;
|
||||
template <auto SIGNATURE, typename Operation, typename StreamConf>
|
||||
std::tuple<bool, float> run(Operation& operation,
|
||||
const Args<SIGNATURE>& args,
|
||||
const Inputs<SIGNATURE>& inputs,
|
||||
const Outputs<SIGNATURE>& outputs,
|
||||
const StreamConf s_conf = {}) = delete;
|
||||
|
||||
} // namespace ck_tile::builder::test
|
||||
|
||||
Reference in New Issue
Block a user