mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Reorganize files, Part 1 (#119)
* delete obselete files * move files * build * update cmake * update cmake * fix build * reorg examples * update cmake for example and test
This commit is contained in:
33
include/ck/tensor_description/cluster_descriptor.hpp
Normal file
33
include/ck/tensor_description/cluster_descriptor.hpp
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef CK_CLUSTER_DESCRIPTOR_HPP
|
||||
#define CK_CLUSTER_DESCRIPTOR_HPP
|
||||
|
||||
#include "common_header.hpp"
|
||||
#include "tensor_adaptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <typename Lengths,
|
||||
typename ArrangeOrder = typename arithmetic_sequence_gen<0, Lengths::Size(), 1>::type>
|
||||
__host__ __device__ constexpr auto make_cluster_descriptor(
|
||||
const Lengths& lengths,
|
||||
ArrangeOrder order = typename arithmetic_sequence_gen<0, Lengths::Size(), 1>::type{})
|
||||
{
|
||||
constexpr index_t ndim_low = Lengths::Size();
|
||||
|
||||
const auto reordered_lengths = container_reorder_given_new2old(lengths, order);
|
||||
|
||||
const auto low_lengths = generate_tuple(
|
||||
[&](auto idim_low) { return reordered_lengths[idim_low]; }, Number<ndim_low>{});
|
||||
|
||||
const auto transform = make_merge_transform(low_lengths);
|
||||
|
||||
constexpr auto low_dim_old_top_ids = ArrangeOrder{};
|
||||
|
||||
constexpr auto up_dim_new_top_ids = Sequence<0>{};
|
||||
|
||||
return make_single_stage_tensor_adaptor(
|
||||
make_tuple(transform), make_tuple(low_dim_old_top_ids), make_tuple(up_dim_new_top_ids));
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
#endif
|
||||
1953
include/ck/tensor_description/multi_index_transform.hpp
Normal file
1953
include/ck/tensor_description/multi_index_transform.hpp
Normal file
File diff suppressed because it is too large
Load Diff
129
include/ck/tensor_description/multi_index_transform_helper.hpp
Normal file
129
include/ck/tensor_description/multi_index_transform_helper.hpp
Normal file
@@ -0,0 +1,129 @@
|
||||
#ifndef CK_MULTI_INDEX_TRANSFORM_HELPER_HPP
|
||||
#define CK_MULTI_INDEX_TRANSFORM_HELPER_HPP
|
||||
|
||||
#include "common_header.hpp"
|
||||
#include "multi_index_transform.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <typename LowLength>
|
||||
__host__ __device__ constexpr auto make_pass_through_transform(const LowLength& low_length)
|
||||
{
|
||||
return PassThrough<LowLength>{low_length};
|
||||
}
|
||||
|
||||
template <typename LowLength, typename LeftPad, typename RightPad, bool SkipIsValidCheck = false>
|
||||
__host__ __device__ constexpr auto
|
||||
make_pad_transform(const LowLength& low_length,
|
||||
const LeftPad& left_pad,
|
||||
const RightPad& right_pad,
|
||||
integral_constant<bool, SkipIsValidCheck> = integral_constant<bool, false>{})
|
||||
{
|
||||
return Pad<LowLength, LeftPad, RightPad, SkipIsValidCheck>{low_length, left_pad, right_pad};
|
||||
}
|
||||
|
||||
template <typename LowLength, typename LeftPadLength, bool SkipIsValidCheck = false>
|
||||
__host__ __device__ constexpr auto make_left_pad_transform(
|
||||
const LowLength& low_length,
|
||||
const LeftPadLength& left_pad,
|
||||
integral_constant<bool, SkipIsValidCheck> = integral_constant<bool, false>{})
|
||||
{
|
||||
return LeftPad<LowLength, LeftPadLength, SkipIsValidCheck>{low_length, left_pad};
|
||||
}
|
||||
|
||||
template <typename LowLength, typename RightPadLength, bool SkipIsValidCheck = false>
|
||||
__host__ __device__ constexpr auto make_right_pad_transform(
|
||||
const LowLength& low_length,
|
||||
const RightPadLength& right_pad,
|
||||
integral_constant<bool, SkipIsValidCheck> = integral_constant<bool, false>{})
|
||||
{
|
||||
return RightPad<LowLength, RightPadLength, SkipIsValidCheck>{low_length, right_pad};
|
||||
}
|
||||
|
||||
template <typename UpLengths,
|
||||
typename Coefficients,
|
||||
typename enable_if<UpLengths::Size() == Coefficients::Size(), bool>::type = false>
|
||||
__host__ __device__ constexpr auto make_embed_transform(const UpLengths& up_lengths,
|
||||
const Coefficients& coefficients)
|
||||
{
|
||||
return Embed<UpLengths, Coefficients>{up_lengths, coefficients};
|
||||
}
|
||||
|
||||
template <typename LowLengths>
|
||||
__host__ __device__ constexpr auto make_merge_transform(const LowLengths& low_lengths)
|
||||
{
|
||||
#if CK_EXPERIMENTAL_MERGE_USE_MAGIC_DIVISION
|
||||
return make_merge_transform_v2_magic_division(low_lengths);
|
||||
#else
|
||||
return make_merge_transform_v1_carry_check(low_lengths);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename LowLengths>
|
||||
__host__ __device__ constexpr auto
|
||||
make_merge_transform_v1_carry_check(const LowLengths& low_lengths)
|
||||
{
|
||||
return Merge_v1_carry_check<LowLengths>{low_lengths};
|
||||
}
|
||||
|
||||
template <typename LowLengths>
|
||||
__host__ __device__ constexpr auto
|
||||
make_merge_transform_v2_magic_division(const LowLengths& low_lengths)
|
||||
{
|
||||
#if 1
|
||||
return Merge_v2_magic_division<LowLengths>{low_lengths};
|
||||
#else
|
||||
return Merge_v2r2_magic_division<LowLengths>{low_lengths};
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename LowLengths>
|
||||
__host__ __device__ constexpr auto
|
||||
make_merge_transform_v3_division_mod(const LowLengths& low_lengths)
|
||||
{
|
||||
return Merge_v3_division_mod<LowLengths>{low_lengths};
|
||||
}
|
||||
|
||||
template <typename UpLengths, bool Use24BitIntegerCalculation = false>
|
||||
__host__ __device__ constexpr auto make_unmerge_transform(
|
||||
const UpLengths& up_lengths,
|
||||
integral_constant<bool, Use24BitIntegerCalculation> = integral_constant<bool, false>{})
|
||||
{
|
||||
return UnMerge<UpLengths, Use24BitIntegerCalculation>{up_lengths};
|
||||
}
|
||||
|
||||
template <typename LowerIndex>
|
||||
__host__ __device__ constexpr auto make_freeze_transform(const LowerIndex& low_idx)
|
||||
{
|
||||
return Freeze<LowerIndex>{low_idx};
|
||||
}
|
||||
|
||||
template <typename UpperIndex>
|
||||
__host__ __device__ constexpr auto make_insert_transform(const UpperIndex& up_idx)
|
||||
{
|
||||
return Insert<UpperIndex>{up_idx};
|
||||
}
|
||||
|
||||
template <typename LowLength, typename SliceBegin, typename SliceEnd>
|
||||
__host__ __device__ constexpr auto make_slice_transform(const LowLength& low_length,
|
||||
const SliceBegin& slice_begin,
|
||||
const SliceEnd& slice_end)
|
||||
{
|
||||
return Slice<LowLength, SliceBegin, SliceEnd>{low_length, slice_begin, slice_end};
|
||||
}
|
||||
|
||||
template <typename VectorSize, typename UpLength>
|
||||
__host__ __device__ constexpr auto make_vectorize_transform(const VectorSize& vector_size,
|
||||
const UpLength& up_length)
|
||||
{
|
||||
return Vectorize<VectorSize, UpLength>{vector_size, up_length};
|
||||
}
|
||||
|
||||
template <typename Modulus, typename UpLength>
|
||||
__host__ __device__ constexpr auto make_modulo_transform(const Modulus& modulus,
|
||||
const UpLength& up_length)
|
||||
{
|
||||
return Modulo<Modulus, UpLength>{modulus, up_length};
|
||||
}
|
||||
} // namespace ck
|
||||
#endif
|
||||
477
include/ck/tensor_description/tensor_adaptor.hpp
Normal file
477
include/ck/tensor_description/tensor_adaptor.hpp
Normal file
@@ -0,0 +1,477 @@
|
||||
#ifndef CK_TENSOR_ADAPTOR_HPP
|
||||
#define CK_TENSOR_ADAPTOR_HPP
|
||||
|
||||
#include "common_header.hpp"
|
||||
#include "tensor_descriptor.hpp"
|
||||
#include "tensor_descriptor_helper.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
// Transforms: Tuple<transforms...>
|
||||
// LowerDimensionHiddenIdss : Tuple<Sequence<...>, ...>
|
||||
// UpperDimensionHiddenIdss : Tuple<Sequence<...>, ...>
|
||||
// BottomDimensionHiddenIds : Sequence<...>
|
||||
// TopDimensionHiddenIds : Sequence<...>
|
||||
template <typename Transforms,
|
||||
typename LowerDimensionHiddenIdss,
|
||||
typename UpperDimensionHiddenIdss,
|
||||
typename BottomDimensionHiddenIds,
|
||||
typename TopDimensionHiddenIds>
|
||||
struct TensorAdaptor
|
||||
{
|
||||
__host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); }
|
||||
|
||||
__host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; }
|
||||
|
||||
__host__ __device__ static constexpr auto GetLowerDimensionHiddenIdss()
|
||||
{
|
||||
return LowerDimensionHiddenIdss{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetUpperDimensionHiddenIdss()
|
||||
{
|
||||
return UpperDimensionHiddenIdss{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetTopDimensionHiddenIds()
|
||||
{
|
||||
return TopDimensionHiddenIds{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetBottomDimensionHiddenIds()
|
||||
{
|
||||
return BottomDimensionHiddenIds{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto InitializeElementSize(const Transforms& transforms)
|
||||
{
|
||||
const auto lengths = generate_tuple(
|
||||
[&](auto idim_top) {
|
||||
constexpr auto tmp = GetTransformAndItsUpperDimension(idim_top);
|
||||
|
||||
constexpr index_t itran = tmp[Number<0>{}];
|
||||
constexpr index_t idim_up = tmp[Number<1>{}];
|
||||
constexpr bool found = tmp[Number<2>{}];
|
||||
|
||||
static_assert(found == true,
|
||||
"wrong! not found matching transformation and upper-dimension");
|
||||
|
||||
const auto length =
|
||||
transforms[Number<itran>{}].GetUpperLengths()[Number<idim_up>{}];
|
||||
|
||||
return length;
|
||||
},
|
||||
Number<ndim_top_>{});
|
||||
|
||||
// TODO: make container_reduce support tuple of Number and index_t
|
||||
return container_reduce(lengths, math::multiplies{}, Number<1>{});
|
||||
}
|
||||
|
||||
template <index_t IDim>
|
||||
__host__ __device__ static constexpr auto GetTransformAndItsUpperDimension(Number<IDim>)
|
||||
{
|
||||
constexpr auto idim_top = Number<IDim>{};
|
||||
|
||||
constexpr index_t idim_hidden = TopDimensionHiddenIds::At(idim_top);
|
||||
|
||||
index_t itran_found = 0;
|
||||
index_t idim_up_found = 0;
|
||||
bool found = false;
|
||||
|
||||
static_for<0, ntransform_, 1>{}([&](auto itran) {
|
||||
constexpr auto up_dim_ids = UpperDimensionHiddenIdss{}[itran];
|
||||
|
||||
static_for<0, up_dim_ids.Size(), 1>{}([&](auto idim_up) {
|
||||
if constexpr(up_dim_ids[idim_up] == idim_hidden)
|
||||
{
|
||||
itran_found = itran;
|
||||
idim_up_found = idim_up;
|
||||
found = true;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return make_tuple(itran_found, idim_up_found, found);
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfBottomDimension()
|
||||
{
|
||||
return BottomDimensionHiddenIds::Size();
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfTopDimension()
|
||||
{
|
||||
return TopDimensionHiddenIds::Size();
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
|
||||
{
|
||||
constexpr auto all_low_dim_ids = unpack(
|
||||
[](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
||||
LowerDimensionHiddenIdss{});
|
||||
|
||||
constexpr auto all_up_dim_ids = unpack(
|
||||
[](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
||||
UpperDimensionHiddenIdss{});
|
||||
|
||||
constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
|
||||
|
||||
using unique_sort_all_dim_ids = typename sequence_unique_sort<decltype(all_dim_ids),
|
||||
math::less<index_t>,
|
||||
math::equal<index_t>>::type;
|
||||
|
||||
return unique_sort_all_dim_ids::Size();
|
||||
}
|
||||
|
||||
constexpr static index_t ntransform_ = GetNumOfTransform();
|
||||
constexpr static index_t ndim_hidden_ = GetNumOfHiddenDimension();
|
||||
constexpr static index_t ndim_bottom_ = GetNumOfBottomDimension();
|
||||
constexpr static index_t ndim_top_ = GetNumOfTopDimension();
|
||||
|
||||
using HiddenIndex = MultiIndex<ndim_hidden_>;
|
||||
using BottomIndex = MultiIndex<ndim_bottom_>;
|
||||
using TopIndex = MultiIndex<ndim_top_>;
|
||||
|
||||
// may be index_t or Number<>
|
||||
using ElementSize = remove_cv_t<decltype(InitializeElementSize(Transforms{}))>;
|
||||
|
||||
public:
|
||||
__host__ __device__ constexpr TensorAdaptor() = default;
|
||||
|
||||
__host__ __device__ constexpr TensorAdaptor(const Transforms& transforms)
|
||||
: transforms_{transforms}, element_size_{InitializeElementSize(transforms)}
|
||||
{
|
||||
static_assert(Transforms::Size() == ntransform_ &&
|
||||
LowerDimensionHiddenIdss::Size() == ntransform_ &&
|
||||
UpperDimensionHiddenIdss::Size() == ntransform_,
|
||||
"wrong! inconsistent # of transformations");
|
||||
|
||||
// TODO check dependency of dimensions is valid
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr auto GetElementSize() const { return element_size_; }
|
||||
|
||||
#if 0 // debug
|
||||
template <index_t I>
|
||||
__host__ __device__ constexpr index_t GetTopDimensionLength(Number<I> idim) const
|
||||
{
|
||||
// TODO: not implemented
|
||||
}
|
||||
|
||||
template <index_t I>
|
||||
__host__ __device__ constexpr index_t GetBottomDimensionLength(Number<I> idim) const
|
||||
{
|
||||
// TODO: not implemented
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename TopIdx>
|
||||
__host__ __device__ constexpr auto CalculateBottomIndex(const TopIdx& idx_top) const
|
||||
{
|
||||
static_assert(TopIdx::Size() == TopDimensionHiddenIds::Size(),
|
||||
"wrong! # of dimension inconsistent");
|
||||
|
||||
constexpr index_t ntransform = GetNumOfTransform();
|
||||
constexpr index_t ndim_hidden = GetNumOfHiddenDimension();
|
||||
|
||||
MultiIndex<ndim_hidden> idx_hidden;
|
||||
|
||||
// initialize uppest index
|
||||
set_container_subset(idx_hidden, GetTopDimensionHiddenIds(), idx_top);
|
||||
|
||||
// calculate hidden index
|
||||
static_for<ntransform, 0, -1>{}([&](auto itran_p1) {
|
||||
auto itran = itran_p1 - Number<1>{};
|
||||
const auto& tran = GetTransforms().At(itran);
|
||||
constexpr auto dims_low = GetLowerDimensionHiddenIdss().At(itran);
|
||||
constexpr auto dims_up = GetUpperDimensionHiddenIdss().At(itran);
|
||||
|
||||
const auto idx_up = get_container_subset(idx_hidden, dims_up);
|
||||
|
||||
MultiIndex<dims_low.Size()> idx_low;
|
||||
|
||||
tran.CalculateLowerIndex(idx_low, idx_up);
|
||||
|
||||
set_container_subset(idx_hidden, dims_low, idx_low);
|
||||
});
|
||||
|
||||
return get_container_subset(idx_hidden, BottomDimensionHiddenIds{});
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr bool IsKnownAtCompileTime()
|
||||
{
|
||||
bool is_known = true;
|
||||
|
||||
static_for<0, Transforms::Size(), 1>{}([&](auto i) {
|
||||
is_known &= remove_cvref_t<decltype(Transforms{}[i])>::IsKnownAtCompileTime();
|
||||
});
|
||||
|
||||
return is_known && is_known_at_compile_time<ElementSize>::value;
|
||||
}
|
||||
|
||||
__host__ __device__ void Print() const
|
||||
{
|
||||
printf("{");
|
||||
printf("TensorAdaptor, ");
|
||||
static_for<0, ntransform_, 1>{}([&](auto i) {
|
||||
printf("transforms: ");
|
||||
transforms_[i].Print();
|
||||
printf("LowerDimensionHiddenIds:");
|
||||
LowerDimensionHiddenIdss{}.At(i).Print();
|
||||
printf("UpperDimensionHiddenIds:");
|
||||
UpperDimensionHiddenIdss{}.At(i).Print();
|
||||
});
|
||||
|
||||
printf("BottomDimensionHiddenIds:");
|
||||
BottomDimensionHiddenIds::Print();
|
||||
printf("TopDimensionHiddenIds:");
|
||||
TopDimensionHiddenIds::Print();
|
||||
|
||||
printf("}");
|
||||
}
|
||||
|
||||
private:
|
||||
Transforms transforms_;
|
||||
ElementSize element_size_;
|
||||
};
|
||||
|
||||
template <typename TensorAdaptor0, typename TensorAdaptor1>
|
||||
__host__ __device__ constexpr auto chain_tensor_adaptors(const TensorAdaptor0& adaptor0,
|
||||
const TensorAdaptor1& adaptor1)
|
||||
{
|
||||
static_assert(TensorAdaptor0::GetNumOfTopDimension() ==
|
||||
TensorAdaptor1::GetNumOfBottomDimension(),
|
||||
"wrong!");
|
||||
|
||||
// all_transforms = transform0 + transform1
|
||||
const auto all_transforms =
|
||||
container_concat(adaptor0.GetTransforms(), adaptor1.GetTransforms());
|
||||
|
||||
// shift
|
||||
constexpr index_t adaptor0_max_hidden_id = [&]() {
|
||||
index_t adaptor0_max_hidden_id_ = NumericLimits<index_t>::Min();
|
||||
|
||||
static_for<0, TensorAdaptor0::GetNumOfTransform(), 1>{}([&](auto itran) {
|
||||
constexpr index_t ndim_low =
|
||||
TensorAdaptor0{}.GetTransforms()[itran].GetNumOfLowerDimension();
|
||||
|
||||
static_for<0, ndim_low, 1>{}([&](auto idim_low) {
|
||||
adaptor0_max_hidden_id_ =
|
||||
math::max(adaptor0_max_hidden_id_,
|
||||
TensorAdaptor0::GetLowerDimensionHiddenIdss()[itran][idim_low].value);
|
||||
});
|
||||
|
||||
constexpr index_t ndim_up =
|
||||
TensorAdaptor0{}.GetTransforms()[itran].GetNumOfUpperDimension();
|
||||
|
||||
static_for<0, ndim_up, 1>{}([&](auto idim_up) {
|
||||
adaptor0_max_hidden_id_ =
|
||||
math::max(adaptor0_max_hidden_id_,
|
||||
TensorAdaptor0::GetUpperDimensionHiddenIdss()[itran][idim_up].value);
|
||||
});
|
||||
});
|
||||
|
||||
return adaptor0_max_hidden_id_;
|
||||
}();
|
||||
|
||||
constexpr index_t adaptor1_min_hidden_id = [&]() {
|
||||
index_t adaptor1_min_hidden_id_ = NumericLimits<index_t>::Max();
|
||||
|
||||
static_for<0, TensorAdaptor1::GetNumOfTransform(), 1>{}([&](auto itran) {
|
||||
constexpr index_t ndim_low =
|
||||
TensorAdaptor1{}.GetTransforms()[itran].GetNumOfLowerDimension();
|
||||
|
||||
// get the min of all lower dimenions, but not bottom dimension (because their id will
|
||||
// be matched with top id from adaptor0)
|
||||
static_for<0, ndim_low, 1>{}([&](auto idim_low) {
|
||||
constexpr index_t low_dim_hidden_id =
|
||||
TensorAdaptor1::GetLowerDimensionHiddenIdss()[itran][idim_low].value;
|
||||
|
||||
bool is_bottom_dim = false;
|
||||
static_for<0, TensorAdaptor1::GetNumOfBottomDimension(), 1>{}([&](auto i) {
|
||||
if constexpr(low_dim_hidden_id ==
|
||||
TensorAdaptor1::GetBottomDimensionHiddenIds()[i])
|
||||
{
|
||||
is_bottom_dim = true;
|
||||
}
|
||||
});
|
||||
|
||||
if(!is_bottom_dim)
|
||||
{
|
||||
adaptor1_min_hidden_id_ = math::min(adaptor1_min_hidden_id_, low_dim_hidden_id);
|
||||
}
|
||||
});
|
||||
|
||||
constexpr index_t ndim_up =
|
||||
TensorAdaptor1{}.GetTransforms()[itran].GetNumOfUpperDimension();
|
||||
|
||||
// get the min of all upper dimensions
|
||||
static_for<0, ndim_up, 1>{}([&](auto idim_up) {
|
||||
adaptor1_min_hidden_id_ =
|
||||
math::min(adaptor1_min_hidden_id_,
|
||||
TensorAdaptor1::GetUpperDimensionHiddenIdss()[itran][idim_up].value);
|
||||
});
|
||||
});
|
||||
|
||||
return adaptor1_min_hidden_id_;
|
||||
}();
|
||||
|
||||
constexpr index_t adaptor1_hidden_id_shift =
|
||||
adaptor0_max_hidden_id + 1 - adaptor1_min_hidden_id;
|
||||
|
||||
constexpr index_t ndim_bottom_1 = TensorAdaptor1::GetNumOfBottomDimension();
|
||||
|
||||
// all_low_dim_hidden_idss =
|
||||
// low_dim_hidden_idss_0 + match_hidden_id_for_1(shift_hidden_id_for_1(low_dim_hiden_idss_1))
|
||||
constexpr auto low_dim_hidden_idss_1 = generate_tuple(
|
||||
// generate sequence of ids for a transform
|
||||
[&](auto itran) {
|
||||
constexpr auto ndim_low_1 = TensorAdaptor1::GetLowerDimensionHiddenIdss()[itran].Size();
|
||||
|
||||
constexpr auto low_dim_hidden_ids_1 =
|
||||
TensorAdaptor1::GetLowerDimensionHiddenIdss()[itran];
|
||||
|
||||
// sequence in, sequence out
|
||||
constexpr auto low_dim_hidden_ids_1_mod = [&]() constexpr
|
||||
{
|
||||
auto low_dim_hidden_ids_1_mod_ = to_multi_index(low_dim_hidden_ids_1);
|
||||
|
||||
// shift hidden id so every dim id is unique
|
||||
static_for<0, ndim_low_1, 1>{}([&](auto idim_low_1) {
|
||||
low_dim_hidden_ids_1_mod_(idim_low_1) += adaptor1_hidden_id_shift;
|
||||
});
|
||||
|
||||
// match hidden id
|
||||
static_for<0, ndim_low_1, 1>{}([&](auto idim_low_1) {
|
||||
static_for<0, ndim_bottom_1, 1>{}([&](auto idim_bottom_1) {
|
||||
// if this low dim is bottom dim, then do id matching
|
||||
if constexpr(low_dim_hidden_ids_1[idim_low_1] ==
|
||||
TensorAdaptor1::GetBottomDimensionHiddenIds()[idim_bottom_1])
|
||||
{
|
||||
low_dim_hidden_ids_1_mod_(idim_low_1) =
|
||||
TensorAdaptor0::GetTopDimensionHiddenIds()[idim_bottom_1];
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return low_dim_hidden_ids_1_mod_;
|
||||
}
|
||||
();
|
||||
|
||||
return generate_sequence_v2(
|
||||
[&](auto i) constexpr { return Number<low_dim_hidden_ids_1_mod[i]>{}; },
|
||||
Number<ndim_low_1>{});
|
||||
},
|
||||
Number<TensorAdaptor1::GetNumOfTransform()>{});
|
||||
|
||||
constexpr auto all_low_dim_hidden_idss =
|
||||
container_concat(TensorAdaptor0::GetLowerDimensionHiddenIdss(), low_dim_hidden_idss_1);
|
||||
|
||||
// all_up_dim_hidden_idss =
|
||||
// up_dim_hidden_idss_0 + shift_hidden_id_for_1(up_dim_hiden_idss_1)
|
||||
constexpr auto up_dim_hidden_idss_1 = generate_tuple(
|
||||
// generate sequence of ids for a transform
|
||||
[&](auto itran) {
|
||||
constexpr auto ndim_up_1 = TensorAdaptor1::GetUpperDimensionHiddenIdss()[itran].Size();
|
||||
|
||||
constexpr auto up_dim_hidden_ids_1 =
|
||||
TensorAdaptor1::GetUpperDimensionHiddenIdss()[itran];
|
||||
|
||||
// sequence in, constexpr tuple out
|
||||
constexpr auto up_dim_hidden_ids_1_mod = [&]() constexpr
|
||||
{
|
||||
auto up_dim_hidden_ids_1_mod_ = to_multi_index(up_dim_hidden_ids_1);
|
||||
|
||||
// shift hidden id
|
||||
static_for<0, ndim_up_1, 1>{}([&](auto idim_up_1) {
|
||||
up_dim_hidden_ids_1_mod_(idim_up_1) += adaptor1_hidden_id_shift;
|
||||
});
|
||||
|
||||
return up_dim_hidden_ids_1_mod_;
|
||||
}
|
||||
();
|
||||
|
||||
// constexpr tuple to sequence
|
||||
return generate_sequence_v2(
|
||||
[&](auto i) constexpr { return Number<up_dim_hidden_ids_1_mod[i]>{}; },
|
||||
Number<ndim_up_1>{});
|
||||
},
|
||||
Number<TensorAdaptor1::GetNumOfTransform()>{});
|
||||
|
||||
constexpr auto all_up_dim_hidden_idss =
|
||||
container_concat(TensorAdaptor0::GetUpperDimensionHiddenIdss(), up_dim_hidden_idss_1);
|
||||
|
||||
// bottom_dim_hidden_ids = bottom_dim_hidden_ids_0
|
||||
constexpr auto bottom_dim_hidden_ids = TensorAdaptor0::GetBottomDimensionHiddenIds();
|
||||
|
||||
// top_dim_hidden_ids = shift_hidden_id(top_dim_hidden_ids_1)
|
||||
constexpr auto top_dim_hidden_ids =
|
||||
TensorAdaptor1::GetTopDimensionHiddenIds() + Number<adaptor1_hidden_id_shift>{};
|
||||
|
||||
// put everything together
|
||||
return TensorAdaptor<remove_cv_t<decltype(all_transforms)>,
|
||||
remove_cv_t<decltype(all_low_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(all_up_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(bottom_dim_hidden_ids)>,
|
||||
remove_cv_t<decltype(top_dim_hidden_ids)>>{all_transforms};
|
||||
}
|
||||
|
||||
// Transforms: Tuple<transforms...>
|
||||
// LowerDimensionOldTopIdss: Tuple<Sequence<...>, ...>
|
||||
// UpperDimensionNewTopIdss: Tuple<Sequence<...>, ...>
|
||||
template <typename Transforms, typename LowerDimensionOldTopIdss, typename UpperDimensionNewTopIdss>
|
||||
__host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transforms& transforms,
|
||||
LowerDimensionOldTopIdss,
|
||||
UpperDimensionNewTopIdss)
|
||||
{
|
||||
constexpr index_t ntransform = Transforms::Size();
|
||||
|
||||
static_assert(LowerDimensionOldTopIdss::Size() == ntransform &&
|
||||
UpperDimensionNewTopIdss::Size() == ntransform,
|
||||
"wrong!");
|
||||
|
||||
// sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss
|
||||
constexpr auto all_low_dim_old_top_ids = unpack(
|
||||
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionOldTopIdss{});
|
||||
|
||||
constexpr auto all_up_dim_new_top_ids = unpack(
|
||||
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionNewTopIdss{});
|
||||
|
||||
static_assert(is_valid_sequence_map<decltype(all_low_dim_old_top_ids)>::value &&
|
||||
is_valid_sequence_map<decltype(all_up_dim_new_top_ids)>::value,
|
||||
"wrong!");
|
||||
|
||||
constexpr index_t ndim_old_top = all_low_dim_old_top_ids.Size();
|
||||
constexpr index_t ndim_new_top = all_up_dim_new_top_ids.Size();
|
||||
|
||||
// low_dim_hidden_idss
|
||||
constexpr auto low_dim_hidden_idss = LowerDimensionOldTopIdss{};
|
||||
|
||||
// up_dim_hidden_idss: shift UpperDimensionNewTopIdss by ndim_bottom
|
||||
constexpr auto up_dim_hidden_idss = generate_tuple(
|
||||
[](auto itran) { return UpperDimensionNewTopIdss{}[itran] + Number<ndim_old_top>{}; },
|
||||
Number<ntransform>{});
|
||||
|
||||
// bottom_dim_hidden_ids
|
||||
constexpr auto bottom_dim_hidden_ids =
|
||||
typename arithmetic_sequence_gen<0, ndim_old_top, 1>::type{};
|
||||
|
||||
// top_dim_hidden_ids
|
||||
constexpr auto top_dim_hidden_ids =
|
||||
typename arithmetic_sequence_gen<0, ndim_new_top, 1>::type{} + Number<ndim_old_top>{};
|
||||
|
||||
return TensorAdaptor<remove_cv_t<Transforms>,
|
||||
remove_cv_t<decltype(low_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(up_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(bottom_dim_hidden_ids)>,
|
||||
remove_cv_t<decltype(top_dim_hidden_ids)>>{transforms};
|
||||
}
|
||||
|
||||
template <typename X, typename... Xs, typename enable_if<sizeof...(Xs) >= 2, bool>::type = false>
|
||||
__host__ __device__ constexpr auto chain_tensor_adaptors(const X& x, const Xs&... xs)
|
||||
{
|
||||
return chain_tensor_adaptors(x, chain_tensor_adaptors(xs...));
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
#endif
|
||||
600
include/ck/tensor_description/tensor_descriptor.hpp
Normal file
600
include/ck/tensor_description/tensor_descriptor.hpp
Normal file
@@ -0,0 +1,600 @@
|
||||
#ifndef CK_TENSOR_DESCRIPTOR_HPP
|
||||
#define CK_TENSOR_DESCRIPTOR_HPP
|
||||
|
||||
#include "common_header.hpp"
|
||||
#include "multi_index_transform.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <index_t NDimHidden, typename VisibleDimensionIds>
|
||||
struct TensorCoordinate;
|
||||
|
||||
template <index_t NTransform, index_t NDimVisible, typename UpdateLowerIndexHack>
|
||||
struct TensorCoordinateStep;
|
||||
|
||||
// Transforms: Tuple<transforms...>
|
||||
// LowerDimensionIdss : Tuple<Sequence<...>, ...>
|
||||
// UpperDimensionIdss : Tuple<Sequence<...>, ...>
|
||||
// VisibleDimensionIds> : Sequence<...>
|
||||
template <typename Transforms,
|
||||
typename LowerDimensionIdss,
|
||||
typename UpperDimensionIdss,
|
||||
typename VisibleDimensionIds,
|
||||
typename ElementSpaceSize>
|
||||
struct TensorDescriptor
|
||||
{
|
||||
// TODO make these private
|
||||
__host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); }
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfVisibleDimension()
|
||||
{
|
||||
return VisibleDimensionIds::Size();
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
|
||||
{
|
||||
constexpr auto all_low_dim_ids = unpack(
|
||||
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionIdss{});
|
||||
|
||||
constexpr auto all_up_dim_ids = unpack(
|
||||
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionIdss{});
|
||||
|
||||
constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
|
||||
|
||||
using unique_sort_all_dim_ids = typename sequence_unique_sort<decltype(all_dim_ids),
|
||||
math::less<index_t>,
|
||||
math::equal<index_t>>::type;
|
||||
|
||||
return unique_sort_all_dim_ids::Size();
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto InitializeElementSize(const Transforms& transforms)
|
||||
{
|
||||
const auto lengths = generate_tuple(
|
||||
[&](auto idim_visible) {
|
||||
constexpr auto tmp = GetTransformAndItsUpperDimension(idim_visible);
|
||||
|
||||
constexpr index_t itran = tmp[Number<0>{}];
|
||||
constexpr index_t idim_up = tmp[Number<1>{}];
|
||||
constexpr bool found = tmp[Number<2>{}];
|
||||
|
||||
static_assert(found == true,
|
||||
"wrong! not found matching transformation and upper-dimension");
|
||||
|
||||
const auto length =
|
||||
transforms[Number<itran>{}].GetUpperLengths()[Number<idim_up>{}];
|
||||
|
||||
return length;
|
||||
},
|
||||
Number<ndim_visible_>{});
|
||||
|
||||
// TODO: make container_reduce support tuple of Number and index_t
|
||||
return container_reduce(lengths, math::multiplies{}, Number<1>{});
|
||||
}
|
||||
|
||||
template <index_t IDim>
|
||||
__host__ __device__ static constexpr auto GetTransformAndItsUpperDimension(Number<IDim>)
|
||||
{
|
||||
constexpr auto idim_visible = Number<IDim>{};
|
||||
|
||||
constexpr index_t idim_hidden = VisibleDimensionIds::At(idim_visible);
|
||||
|
||||
index_t itran_found = 0;
|
||||
index_t idim_up_found = 0;
|
||||
bool found = false;
|
||||
|
||||
static_for<0, ntransform_, 1>{}([&](auto itran) {
|
||||
constexpr auto up_dim_ids = UpperDimensionIdss{}[itran];
|
||||
|
||||
static_for<0, up_dim_ids.Size(), 1>{}([&](auto idim_up) {
|
||||
if constexpr(up_dim_ids[idim_up] == idim_hidden)
|
||||
{
|
||||
itran_found = itran;
|
||||
idim_up_found = idim_up;
|
||||
found = true;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return make_tuple(itran_found, idim_up_found, found);
|
||||
}
|
||||
|
||||
constexpr static index_t ntransform_ = GetNumOfTransform();
|
||||
constexpr static index_t ndim_visible_ = GetNumOfVisibleDimension();
|
||||
constexpr static index_t ndim_hidden_ = GetNumOfHiddenDimension();
|
||||
|
||||
using VisibleIndex = MultiIndex<ndim_visible_>;
|
||||
using HiddenIndex = MultiIndex<ndim_hidden_>;
|
||||
using Coordinate = TensorCoordinate<ndim_hidden_, VisibleDimensionIds>;
|
||||
|
||||
// may be index_t or Number<>
|
||||
using ElementSize = remove_cv_t<decltype(InitializeElementSize(Transforms{}))>;
|
||||
|
||||
public:
|
||||
__host__ __device__ constexpr TensorDescriptor() = default;
|
||||
|
||||
__host__ __device__ constexpr TensorDescriptor(const Transforms& transforms,
|
||||
ElementSpaceSize element_space_size)
|
||||
: transforms_{transforms},
|
||||
element_size_{InitializeElementSize(transforms)},
|
||||
element_space_size_{element_space_size}
|
||||
|
||||
{
|
||||
static_assert(Transforms::Size() == ntransform_ &&
|
||||
LowerDimensionIdss::Size() == ntransform_ &&
|
||||
UpperDimensionIdss::Size() == ntransform_,
|
||||
"wrong! inconsistent # of transformations");
|
||||
|
||||
// TODO check dependency of dimensions is valid
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr index_t GetNumOfDimension()
|
||||
{
|
||||
return GetNumOfVisibleDimension();
|
||||
}
|
||||
|
||||
template <index_t IDim>
|
||||
__host__ __device__ constexpr auto GetLength(Number<IDim>) const
|
||||
{
|
||||
static_assert(IDim >= 0 && IDim < ndim_visible_, "wrong! out of range");
|
||||
|
||||
constexpr auto tmp = GetTransformAndItsUpperDimension(Number<IDim>{});
|
||||
|
||||
constexpr index_t itran = tmp[Number<0>{}];
|
||||
constexpr index_t idim_up = tmp[Number<1>{}];
|
||||
constexpr bool found = tmp[Number<2>{}];
|
||||
|
||||
static_assert(found == true,
|
||||
"wrong! not found matching transformation and upper-dimension");
|
||||
|
||||
return transforms_[Number<itran>{}].GetUpperLengths()[Number<idim_up>{}];
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr auto GetElementSize() const { return element_size_; }
|
||||
|
||||
__host__ __device__ constexpr auto GetElementSpaceSize() const { return element_space_size_; }
|
||||
|
||||
template <typename Idx>
|
||||
__host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const
|
||||
{
|
||||
static_assert(Idx::Size() == GetNumOfDimension(), "wrong! inconsistent # of dimension");
|
||||
|
||||
return make_tensor_coordinate(*this, idx).GetOffset();
|
||||
}
|
||||
|
||||
// TODO make these private
|
||||
__host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; }
|
||||
|
||||
__host__ __device__ static constexpr auto GetLowerDimensionIdss()
|
||||
{
|
||||
return LowerDimensionIdss{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetUpperDimensionIdss()
|
||||
{
|
||||
return UpperDimensionIdss{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetVisibleDimensionIds()
|
||||
{
|
||||
return VisibleDimensionIds{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr bool IsKnownAtCompileTime()
|
||||
{
|
||||
bool is_known = true;
|
||||
|
||||
static_for<0, Transforms::Size(), 1>{}([&](auto i) {
|
||||
is_known &= remove_cvref_t<decltype(Transforms{}[i])>::IsKnownAtCompileTime();
|
||||
});
|
||||
|
||||
return is_known && is_known_at_compile_time<ElementSize>::value &&
|
||||
is_known_at_compile_time<ElementSpaceSize>::value;
|
||||
}
|
||||
|
||||
__host__ __device__ void Print() const
|
||||
{
|
||||
printf("{");
|
||||
printf("TensorDescriptor, ");
|
||||
static_for<0, ntransform_, 1>{}([&](auto i) {
|
||||
printf("transforms: ");
|
||||
transforms_[i].Print();
|
||||
printf("LowerDimensionIds:");
|
||||
LowerDimensionIdss{}.At(i).Print();
|
||||
printf("UpperDimensionIds:");
|
||||
UpperDimensionIdss{}.At(i).Print();
|
||||
});
|
||||
printf("}");
|
||||
|
||||
VisibleDimensionIds::Print();
|
||||
}
|
||||
|
||||
// TODO make these private
|
||||
Transforms transforms_;
|
||||
ElementSize element_size_;
|
||||
ElementSpaceSize element_space_size_;
|
||||
};
|
||||
|
||||
template <index_t NDimHidden, typename VisibleDimensionIds>
|
||||
struct TensorCoordinate
|
||||
{
|
||||
// TODO make these private
|
||||
static constexpr index_t ndim_visible_ = VisibleDimensionIds::Size();
|
||||
|
||||
using HiddenIndex = MultiIndex<NDimHidden>;
|
||||
using VisibleIndex = MultiIndex<ndim_visible_>;
|
||||
|
||||
public:
|
||||
__host__ __device__ constexpr TensorCoordinate() = default;
|
||||
|
||||
__host__ __device__ constexpr TensorCoordinate(const HiddenIndex& idx_hidden)
|
||||
: idx_hidden_{idx_hidden}
|
||||
{
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr auto GetIndex() const { return GetVisibleIndex(); }
|
||||
|
||||
__host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
|
||||
|
||||
// TODO make these private
|
||||
__host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; }
|
||||
|
||||
__host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; }
|
||||
|
||||
__host__ __device__ constexpr auto GetVisibleIndex() const
|
||||
{
|
||||
return get_container_subset(idx_hidden_, VisibleDimensionIds{});
|
||||
}
|
||||
|
||||
// TODO make these private
|
||||
HiddenIndex idx_hidden_;
|
||||
};
|
||||
|
||||
template <index_t NTransform, index_t NDimVisible, typename UpdateLowerIndexHack>
|
||||
struct TensorCoordinateStep
|
||||
{
|
||||
// TODO make these private
|
||||
using VisibleIndex = MultiIndex<NDimVisible>;
|
||||
|
||||
public:
|
||||
__host__ __device__ constexpr TensorCoordinateStep() = default;
|
||||
|
||||
__host__ __device__ constexpr TensorCoordinateStep(const VisibleIndex& idx_diff_visible,
|
||||
const MultiIndex<NTransform>& do_transforms)
|
||||
: idx_diff_visible_{idx_diff_visible}, do_transforms_{do_transforms}
|
||||
{
|
||||
}
|
||||
|
||||
__host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); }
|
||||
|
||||
// TODO make these private
|
||||
__host__ __device__ constexpr const auto& GetVisibleIndexDiff() const
|
||||
{
|
||||
return idx_diff_visible_;
|
||||
}
|
||||
|
||||
VisibleIndex idx_diff_visible_;
|
||||
MultiIndex<NTransform> do_transforms_;
|
||||
|
||||
// HACK: control UpdateLowerIndex()
|
||||
static constexpr UpdateLowerIndexHack update_lower_index_hack_;
|
||||
};
|
||||
|
||||
// TODO: How to fix this? It uses an struct instead of lambda because lambda
|
||||
// doesn't have constructor, and to put it outside the scope where it is used
|
||||
// (transform_tensor_descriptor) because template cannot be defined inside a function
|
||||
// template
|
||||
template <typename NewTransforms>
|
||||
struct lambda_get_up_dim_num
|
||||
{
|
||||
template <typename I>
|
||||
__host__ __device__ constexpr auto operator()(I) const
|
||||
{
|
||||
using Tran = remove_reference_t<decltype(NewTransforms{}.At(I{}))>;
|
||||
return Number<Tran::GetNumOfUpperDimension()>{};
|
||||
}
|
||||
};
|
||||
|
||||
template <typename OldTensorDescriptor,
|
||||
typename NewTransforms,
|
||||
typename NewLowerDimensionOldVisibleIdss,
|
||||
typename NewUpperDimensionNewVisibleIdss>
|
||||
__host__ __device__ constexpr auto
|
||||
transform_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
|
||||
const NewTransforms& new_transforms,
|
||||
NewLowerDimensionOldVisibleIdss,
|
||||
NewUpperDimensionNewVisibleIdss)
|
||||
{
|
||||
// sanity check
|
||||
{
|
||||
static_assert(NewTransforms::Size() == NewLowerDimensionOldVisibleIdss::Size() &&
|
||||
NewTransforms::Size() == NewUpperDimensionNewVisibleIdss::Size(),
|
||||
"wrong! inconsitent number of transform");
|
||||
|
||||
constexpr auto all_old_top_ids = unpack([](auto... xs) { return merge_sequences(xs...); },
|
||||
NewLowerDimensionOldVisibleIdss{});
|
||||
|
||||
constexpr auto all_new_top_ids = unpack([](auto... xs) { return merge_sequences(xs...); },
|
||||
NewUpperDimensionNewVisibleIdss{});
|
||||
|
||||
static_assert(is_valid_sequence_map<decltype(all_old_top_ids)>::value &&
|
||||
is_valid_sequence_map<decltype(all_new_top_ids)>::value,
|
||||
"wrong!");
|
||||
}
|
||||
|
||||
// lower dimension's hidden idss
|
||||
// convert lower dimension visible idss (tuple of sequences) to hidden idss (tuple of
|
||||
// sequences)
|
||||
constexpr auto low_dim_hidden_idss = transform_tuples(
|
||||
// convert lower dimension visible ids (a sequence) to hidden ids (a sequence)
|
||||
[](auto low_dim_visible_ids) constexpr {
|
||||
return transform_sequences(
|
||||
// convert lower dimension visible id to hidden id
|
||||
[](auto low_dim_visible_id) constexpr {
|
||||
return OldTensorDescriptor::GetVisibleDimensionIds()[low_dim_visible_id];
|
||||
},
|
||||
low_dim_visible_ids);
|
||||
},
|
||||
NewLowerDimensionOldVisibleIdss{});
|
||||
|
||||
constexpr index_t num_new_transform = NewTransforms::Size();
|
||||
|
||||
// upper dimension's hidden idss
|
||||
constexpr index_t old_hidden_dim_number = OldTensorDescriptor::GetNumOfHiddenDimension();
|
||||
|
||||
constexpr auto up_dim_numbers =
|
||||
generate_sequence(lambda_get_up_dim_num<NewTransforms>{}, Number<num_new_transform>{});
|
||||
|
||||
constexpr auto up_dim_numbers_scan = merge_sequences(
|
||||
Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus<index_t>{}, Number<0>{}));
|
||||
|
||||
constexpr auto up_dim_hidden_idss = generate_tuple(
|
||||
[ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
|
||||
return
|
||||
typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
|
||||
old_hidden_dim_number + up_dim_numbers_scan[i + 1],
|
||||
1>::type{};
|
||||
},
|
||||
Number<num_new_transform>{});
|
||||
|
||||
// new visible dimension's hidden ids
|
||||
constexpr auto unordered_new_visible_dim_hidden_ids = unpack(
|
||||
[](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
|
||||
|
||||
constexpr auto new_visible_dim_unordered2ordered = unpack(
|
||||
[](auto... xs) constexpr { return merge_sequences(xs...); },
|
||||
NewUpperDimensionNewVisibleIdss{});
|
||||
|
||||
constexpr auto new_visible_dim_hidden_ids =
|
||||
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
|
||||
|
||||
// put everything together
|
||||
const auto all_transforms = container_concat(old_tensor_desc.GetTransforms(), new_transforms);
|
||||
|
||||
constexpr auto all_low_dim_hidden_idss =
|
||||
container_concat(OldTensorDescriptor::GetLowerDimensionIdss(), low_dim_hidden_idss);
|
||||
|
||||
constexpr auto all_up_dim_hidden_idss =
|
||||
container_concat(OldTensorDescriptor::GetUpperDimensionIdss(), up_dim_hidden_idss);
|
||||
|
||||
const auto element_space_size = old_tensor_desc.GetElementSpaceSize();
|
||||
|
||||
return TensorDescriptor<remove_cv_t<decltype(all_transforms)>,
|
||||
remove_cv_t<decltype(all_low_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(all_up_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(new_visible_dim_hidden_ids)>,
|
||||
remove_cv_t<decltype(element_space_size)>>{all_transforms,
|
||||
element_space_size};
|
||||
}
|
||||
|
||||
template <typename TensorDesc, typename VisibleIndex>
|
||||
__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc& tensor_desc,
|
||||
const VisibleIndex& idx_visible)
|
||||
{
|
||||
static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
|
||||
"wrong! # of dimension inconsistent");
|
||||
|
||||
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
|
||||
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
|
||||
constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
|
||||
|
||||
MultiIndex<ndim_hidden> idx_hidden;
|
||||
|
||||
// initialize visible index
|
||||
set_container_subset(idx_hidden, visible_dim_ids, idx_visible);
|
||||
|
||||
// calculate hidden index
|
||||
static_for<ntransform, 0, -1>{}([&tensor_desc, &idx_hidden](auto itran_p1) {
|
||||
auto itran = itran_p1 - Number<1>{};
|
||||
const auto& tran = tensor_desc.GetTransforms().At(itran);
|
||||
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
|
||||
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
|
||||
|
||||
const auto idx_up = get_container_subset(idx_hidden, dims_up);
|
||||
|
||||
MultiIndex<dims_low.Size()> idx_low;
|
||||
|
||||
tran.CalculateLowerIndex(idx_low, idx_up);
|
||||
|
||||
set_container_subset(idx_hidden, dims_low, idx_low);
|
||||
});
|
||||
|
||||
return TensorCoordinate<ndim_hidden, decltype(visible_dim_ids)>{idx_hidden};
|
||||
}
|
||||
|
||||
// UpdateLowerIndexHack: Sequence<...>
|
||||
// HACK: control UpdateLowerIndex
|
||||
template <typename TensorDesc, typename VisibleIndex, typename UpdateLowerIndexHack>
|
||||
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc&,
|
||||
const VisibleIndex& idx_diff_visible,
|
||||
UpdateLowerIndexHack)
|
||||
{
|
||||
static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
|
||||
"wrong! # of dimension inconsistent");
|
||||
|
||||
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
|
||||
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
|
||||
constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
|
||||
constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
|
||||
|
||||
static_assert(UpdateLowerIndexHack::Size() == ntransform, "wrong!");
|
||||
|
||||
// use index_t for boolean type
|
||||
auto do_transforms = make_zero_multi_index<ntransform>();
|
||||
auto is_non_zero_diff = make_zero_multi_index<ndim_hidden>();
|
||||
|
||||
// decide do_transform by checkout non-zero index diff components
|
||||
MultiIndex<VisibleIndex::Size()> non_zero_diff_pick_visible;
|
||||
|
||||
static_for<0, ndim_visible, 1>{}(
|
||||
[&](auto i) { non_zero_diff_pick_visible(i) = (idx_diff_visible[i] != 0); });
|
||||
|
||||
set_container_subset(is_non_zero_diff, visible_dim_ids, non_zero_diff_pick_visible);
|
||||
|
||||
static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
|
||||
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
|
||||
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
|
||||
|
||||
const auto non_zero_diff_pick_up = get_container_subset(is_non_zero_diff, dims_up);
|
||||
|
||||
MultiIndex<dims_low.Size()> non_zero_diff_pick_low;
|
||||
|
||||
// if any of upper index diff components is non-zero, then
|
||||
// 1) Need to do this transform
|
||||
// 2) all components of lower index diff will assume to be non-zero and need to be
|
||||
// computed
|
||||
const bool idx_diff_up_has_non_zero = container_reduce(
|
||||
non_zero_diff_pick_up, [](auto a, auto b) constexpr { return a or b; }, false);
|
||||
|
||||
do_transforms(itran) = idx_diff_up_has_non_zero;
|
||||
|
||||
static_for<0, dims_low.Size(), 1>{}(
|
||||
[&](auto i) { non_zero_diff_pick_low(i) = idx_diff_up_has_non_zero; });
|
||||
|
||||
set_container_subset(is_non_zero_diff, dims_low, non_zero_diff_pick_low);
|
||||
});
|
||||
|
||||
return TensorCoordinateStep<ntransform, ndim_visible, UpdateLowerIndexHack>{idx_diff_visible,
|
||||
do_transforms};
|
||||
}
|
||||
|
||||
template <typename TensorDesc, typename VisibleIndex>
|
||||
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc&,
|
||||
const VisibleIndex& idx_diff_visible)
|
||||
{
|
||||
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
|
||||
|
||||
return make_tensor_coordinate_step(
|
||||
TensorDesc{}, idx_diff_visible, typename uniform_sequence_gen<ntransform, 0>::type{});
|
||||
}
|
||||
|
||||
template <typename TensorDesc, typename TensorCoord, typename TensorCoordStep>
|
||||
__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc& tensor_desc,
|
||||
TensorCoord& coord,
|
||||
const TensorCoordStep& coord_step)
|
||||
{
|
||||
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
|
||||
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
|
||||
|
||||
// this is what needs to be calculated
|
||||
auto idx_diff_hidden = make_zero_multi_index<ndim_hidden>();
|
||||
|
||||
// initialize visible index diff
|
||||
set_container_subset(
|
||||
idx_diff_hidden, TensorDesc::GetVisibleDimensionIds(), coord_step.GetVisibleIndexDiff());
|
||||
|
||||
// this is what needs to be updated
|
||||
auto& idx_hidden = coord.GetHiddenIndex();
|
||||
|
||||
// update visible index
|
||||
auto idx_hidden_pick_visible =
|
||||
get_container_subset(idx_hidden, TensorDesc::GetVisibleDimensionIds());
|
||||
|
||||
idx_hidden_pick_visible += coord_step.GetIndexDiff();
|
||||
|
||||
set_container_subset(idx_hidden, TensorDesc::GetVisibleDimensionIds(), idx_hidden_pick_visible);
|
||||
|
||||
// update rest of hidden index
|
||||
static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
|
||||
if(coord_step.do_transforms_[itran])
|
||||
{
|
||||
const auto& tran = tensor_desc.GetTransforms().At(itran);
|
||||
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
|
||||
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
|
||||
|
||||
const auto idx_up_new = get_container_subset(idx_hidden, dims_up);
|
||||
auto idx_low = get_container_subset(idx_hidden, dims_low);
|
||||
const auto idx_diff_up = get_container_subset(idx_diff_hidden, dims_up);
|
||||
|
||||
MultiIndex<dims_low.Size()> idx_diff_low;
|
||||
|
||||
// HACK: control UpdateLowerIndex for Merge using hack
|
||||
constexpr index_t Hack = decltype(coord_step.update_lower_index_hack_)::At(itran);
|
||||
|
||||
tran.UpdateLowerIndex(idx_diff_low, idx_diff_up, idx_low, idx_up_new, Number<Hack>{});
|
||||
|
||||
set_container_subset(idx_diff_hidden, dims_low, idx_diff_low);
|
||||
set_container_subset(idx_hidden, dims_low, idx_low);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename TensorDesc, typename TensorCoord>
|
||||
__host__ __device__ constexpr bool
|
||||
coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc& tensor_desc,
|
||||
const TensorCoord& coord)
|
||||
{
|
||||
bool valid = true;
|
||||
|
||||
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
|
||||
|
||||
const auto& idx_hidden = coord.GetHiddenIndex();
|
||||
|
||||
static_for<ntransform - 1, -1, -1>{}([&tensor_desc, &idx_hidden, &valid](auto itran) {
|
||||
const auto tran = tensor_desc.GetTransforms().At(itran);
|
||||
|
||||
// check validity, only if current transformation does not always has a valid mapping
|
||||
if constexpr(!decltype(tran)::IsValidUpperIndexAlwaysMappedToValidLowerIndex())
|
||||
{
|
||||
const auto idx_up =
|
||||
get_container_subset(idx_hidden, TensorDesc::GetUpperDimensionIdss().At(itran));
|
||||
|
||||
// Comment: using valid = valid && .. will result in weird control flow in ISA
|
||||
valid &= tran.IsValidUpperIndexMappedToValidLowerIndex(idx_up);
|
||||
}
|
||||
});
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
template <typename TensorDesc, typename TensorCoord>
|
||||
__host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc& tensor_desc,
|
||||
const TensorCoord& coord)
|
||||
{
|
||||
// check visible index
|
||||
const auto& idx_visible = coord.GetVisibleIndex();
|
||||
|
||||
bool is_visible_index_valid = true;
|
||||
|
||||
static_for<0, TensorDesc::GetNumOfDimension(), 1>{}(
|
||||
[&is_visible_index_valid, &idx_visible, &tensor_desc](auto i) {
|
||||
is_visible_index_valid =
|
||||
is_visible_index_valid &&
|
||||
(idx_visible[i] >= 0 && idx_visible[i] < tensor_desc.GetLength(i));
|
||||
});
|
||||
|
||||
// check other hidden index
|
||||
return is_visible_index_valid &&
|
||||
coordinate_has_valid_offset_assuming_visible_index_is_valid(tensor_desc, coord);
|
||||
}
|
||||
|
||||
template <typename TensorDesc>
|
||||
using TensorCoordinate_t = decltype(make_tensor_coordinate(
|
||||
TensorDesc{}, MultiIndex<remove_cvref_t<TensorDesc>::GetNumOfDimension()>{}));
|
||||
|
||||
template <typename TensorDesc>
|
||||
using TensorCoordinateStep_t = decltype(make_tensor_coordinate_step(
|
||||
TensorDesc{}, MultiIndex<remove_cvref_t<TensorDesc>::GetNumOfDimension()>{}));
|
||||
|
||||
} // namespace ck
|
||||
#endif
|
||||
149
include/ck/tensor_description/tensor_descriptor_helper.hpp
Normal file
149
include/ck/tensor_description/tensor_descriptor_helper.hpp
Normal file
@@ -0,0 +1,149 @@
|
||||
#ifndef CK_TENSOR_DESCRIPTOR_HELPER_HPP
|
||||
#define CK_TENSOR_DESCRIPTOR_HELPER_HPP
|
||||
|
||||
#include "common_header.hpp"
|
||||
#include "tensor_descriptor.hpp"
|
||||
#include "multi_index_transform_helper.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
/*
|
||||
* These functions create tensor descriptor at runtime. If they are not constexpr, you will
|
||||
* likely see usage of scratch memory during construction of these tensor descriptors. So
|
||||
* it's better to call these functions on host and then pass the constructed tensor descritpors
|
||||
* to GPU. If the tensor descritpors being constructed are constexpr, then you can call these
|
||||
* functions on GPU without worrying about scratch memory usage.
|
||||
*/
|
||||
|
||||
#if CK_WORKAROUND_SWDEV_275126
|
||||
template <typename Lengths, typename Strides, index_t I, typename AccOld>
|
||||
__host__ __device__ constexpr auto calculate_element_space_size_impl(const Lengths& lengths,
|
||||
const Strides& strides,
|
||||
Number<I> i,
|
||||
AccOld acc_old)
|
||||
{
|
||||
auto acc_new = acc_old + (lengths[i] - Number<1>{}) * strides[i];
|
||||
|
||||
if constexpr(i.value < Lengths::Size() - 1)
|
||||
{
|
||||
return calculate_element_space_size_impl(lengths, strides, i + Number<1>{}, acc_new);
|
||||
}
|
||||
else
|
||||
{
|
||||
return acc_new;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename... Lengths,
|
||||
typename... Strides,
|
||||
typename enable_if<sizeof...(Lengths) == sizeof...(Strides), bool>::type = false>
|
||||
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple<Lengths...>& lengths,
|
||||
const Tuple<Strides...>& strides)
|
||||
{
|
||||
constexpr index_t N = sizeof...(Lengths);
|
||||
|
||||
const auto transforms = make_tuple(make_embed_transform(lengths, strides));
|
||||
|
||||
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
|
||||
|
||||
constexpr auto up_dim_hidden_idss =
|
||||
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
|
||||
|
||||
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
|
||||
|
||||
#if !CK_WORKAROUND_SWDEV_275126
|
||||
// rocm-4.1 compiler would crash for recursive labmda
|
||||
// recursive function for reduction
|
||||
auto f = [&](auto fs, auto i, auto acc_old) {
|
||||
auto acc_new = acc_old + (lengths[i] - Number<1>{}) * strides[i];
|
||||
|
||||
if constexpr(i.value < N - 1)
|
||||
{
|
||||
return fs(fs, i + Number<1>{}, acc_new);
|
||||
}
|
||||
else
|
||||
{
|
||||
return acc_new;
|
||||
}
|
||||
};
|
||||
|
||||
const auto element_space_size = f(f, Number<0>{}, Number<1>{});
|
||||
#else
|
||||
const auto element_space_size =
|
||||
calculate_element_space_size_impl(lengths, strides, Number<0>{}, Number<1>{});
|
||||
#endif
|
||||
|
||||
return TensorDescriptor<remove_cv_t<decltype(transforms)>,
|
||||
remove_cv_t<decltype(low_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(up_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(visible_dim_hidden_ids)>,
|
||||
remove_cv_t<decltype(element_space_size)>>{transforms,
|
||||
element_space_size};
|
||||
}
|
||||
|
||||
// Lengths... can be:
|
||||
// 1) index_t, which is known at run-time
|
||||
// 2) Number<>, which is known at compile-time
|
||||
template <typename... Lengths>
|
||||
__host__ __device__ constexpr auto
|
||||
make_naive_tensor_descriptor_packed(const Tuple<Lengths...>& lengths)
|
||||
{
|
||||
constexpr index_t N = sizeof...(Lengths);
|
||||
|
||||
const auto transforms = make_tuple(make_unmerge_transform(lengths));
|
||||
|
||||
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
|
||||
|
||||
constexpr auto up_dim_hidden_idss =
|
||||
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
|
||||
|
||||
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
|
||||
|
||||
const auto element_space_size = container_reduce(lengths, math::multiplies{}, Number<1>{});
|
||||
|
||||
return TensorDescriptor<remove_cv_t<decltype(transforms)>,
|
||||
remove_cv_t<decltype(low_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(up_dim_hidden_idss)>,
|
||||
remove_cv_t<decltype(visible_dim_hidden_ids)>,
|
||||
remove_cv_t<decltype(element_space_size)>>{transforms,
|
||||
element_space_size};
|
||||
}
|
||||
|
||||
template <typename... Lengths, typename Align>
|
||||
__host__ __device__ constexpr auto
|
||||
make_naive_tensor_descriptor_aligned(const Tuple<Lengths...>& lengths, Align align)
|
||||
{
|
||||
constexpr auto I1 = Number<1>{};
|
||||
|
||||
constexpr index_t N = sizeof...(Lengths);
|
||||
|
||||
const auto stride_n_minus_2 = math::integer_least_multiple(lengths[Number<N - 1>{}], align);
|
||||
|
||||
auto strides = generate_tuple(
|
||||
[&](auto i) {
|
||||
if constexpr(i.value == N - 1)
|
||||
{
|
||||
return I1;
|
||||
}
|
||||
else if constexpr(i.value == N - 2)
|
||||
{
|
||||
return Number<stride_n_minus_2>{};
|
||||
}
|
||||
else
|
||||
{
|
||||
return container_reduce(lengths,
|
||||
math::multiplies{},
|
||||
Number<stride_n_minus_2>{},
|
||||
i + I1,
|
||||
Number<N - 1>{},
|
||||
I1);
|
||||
}
|
||||
},
|
||||
Number<N>{});
|
||||
|
||||
return make_naive_tensor_descriptor(lengths, strides);
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
#endif
|
||||
Reference in New Issue
Block a user