mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 09:16:52 +00:00
adding GetLinearDimensionMask()
This commit is contained in:
@@ -440,9 +440,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
|
||||
0,
|
||||
b_thread_data_on_global,
|
||||
0})
|
||||
#if 0
|
||||
.Run_generic
|
||||
#elif 1
|
||||
#if 1
|
||||
.template Run_generic<Float, address_space_t::generic, address_space_t::global>
|
||||
#elif 1
|
||||
.template Run_optimized_dst_address_calculation<Float, address_space_t::global>
|
||||
|
||||
@@ -45,6 +45,7 @@ struct NativeTensorCoordinate
|
||||
__host__ __device__ constexpr type operator+=(const Index& idx_diff)
|
||||
{
|
||||
// mIndex is updated here, but some (or all) of its entries may never be used
|
||||
// compiler should remove those entries as dead code
|
||||
mIndex += idx_diff;
|
||||
|
||||
mOffset += tensor_desc_type::CalculateOffsetDiff(idx_diff);
|
||||
@@ -55,6 +56,7 @@ struct NativeTensorCoordinate
|
||||
__host__ __device__ constexpr type operator-=(const Index& idx_diff)
|
||||
{
|
||||
// mIndex is updated here, but some (or all) of its entries may never be used
|
||||
// compiler should remove those entries as dead code
|
||||
mIndex -= idx_diff;
|
||||
|
||||
mOffset -= tensor_desc_type::CalculateOffsetDiff(idx_diff);
|
||||
@@ -136,6 +138,7 @@ struct TransformedTensorCoordinate
|
||||
idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex());
|
||||
|
||||
// mIndexUp is updated here, but some (or all) of its entries may never be used
|
||||
// compiler should remove those entries as dead code
|
||||
mIndexUp += idx_up_diff;
|
||||
|
||||
return *this;
|
||||
@@ -146,6 +149,8 @@ struct TransformedTensorCoordinate
|
||||
mCoordLow -= tensor_desc_type::CalculateLowerIndexDiff(
|
||||
idx_up_diff, GetIndex(), GetLowerCoordinate().GetIndex());
|
||||
|
||||
// mIndex is updated here, but some (or all) of its entries may never be used
|
||||
// compiler should remove those entries as dead code
|
||||
mIndexUp -= idx_up_diff;
|
||||
|
||||
return *this;
|
||||
|
||||
@@ -101,12 +101,12 @@ struct NativeTensorDescriptor
|
||||
return true;
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetMaskOfLinearDimensions()
|
||||
__host__ __device__ static constexpr auto GetLinearDimensionMask()
|
||||
{
|
||||
return typename uniform_sequence_gen<nDim, 1>::type{};
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetMaskOfNonLinearDimensions()
|
||||
__host__ __device__ static constexpr auto GetNonLinearDimensionMask()
|
||||
{
|
||||
return typename uniform_sequence_gen<nDim, 0>::type{};
|
||||
}
|
||||
@@ -353,18 +353,27 @@ struct TransformedTensorDescriptor
|
||||
return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up));
|
||||
}
|
||||
|
||||
#if 0
|
||||
struct lambda_sequence_logic_or
|
||||
#if 1
|
||||
struct lambda_sequence_logical_and
|
||||
{
|
||||
template <typename... Seqs>
|
||||
__host__ __device__ constexpr auto operator()(Seqs... seqs) const
|
||||
__host__ __device__ constexpr auto operator()(Seqs...) const
|
||||
{
|
||||
// TODO: should use math::logic_or<bool>, after Sequence can take bool
|
||||
return typename sequence_reduce<math::logic_or<bool>, Seqs...>::type{};
|
||||
return typename sequence_reduce<logical_and<index_t>, Seqs...>::type{};
|
||||
}
|
||||
};
|
||||
|
||||
struct lambda_1
|
||||
template <typename T>
|
||||
struct lambda_is_true
|
||||
{
|
||||
__host__ __device__ constexpr auto operator()(const T& x) const
|
||||
{
|
||||
// TODO: remove static_cast once Sequence can take bool as entries
|
||||
return static_cast<bool>(x) == true;
|
||||
}
|
||||
};
|
||||
|
||||
struct lambda_get_linear_dimension_mask_of_single_tranform
|
||||
{
|
||||
// check only one transform at a time
|
||||
template <typename Transform, typename LowDimensionId, typename UpDimensionId>
|
||||
@@ -372,73 +381,73 @@ struct TransformedTensorDescriptor
|
||||
operator()(const Transform& tran, LowDimensionId, UpDimensionId) const
|
||||
{
|
||||
// judge if transformation is linear
|
||||
constexpr bool is_linear_transform = tran.IsLinearTransform();
|
||||
constexpr bool is_linear_transform = Transform::IsLinearTransform();
|
||||
|
||||
// judge if all lower dimension are linear
|
||||
constexpr bool is_all_low_dim_linear = math::reduce_on_sequence(
|
||||
pick_sequence_elements_by_mask(
|
||||
GetLowerTensorDescriptor().GetMaskOfLinearDimensions(), LowDimensionId{}),
|
||||
math::logic_and<bool>{},
|
||||
integral_constant<bool, true>{});
|
||||
constexpr bool are_all_low_dim_linear = sequence_all_of(
|
||||
pick_sequence_elements_by_ids(GetLowerTensorDescriptor().GetLinearDimensionMask(),
|
||||
LowDimensionId{}),
|
||||
lambda_is_true<index_t>{});
|
||||
|
||||
// judge if upper dimenisons are linear
|
||||
constexpr bool is_up_dim_nonlinear = !(is_linear_transform && is_all_low_dim_linear);
|
||||
// create linear mask for upper dimensions
|
||||
constexpr bool are_up_dim_linear = is_linear_transform && are_all_low_dim_linear;
|
||||
|
||||
constexpr auto value_sequence =
|
||||
typename uniform_sequence_gen<tran.GetNumOfUpperDimension(),
|
||||
is_up_dim_nonlinear>::type{};
|
||||
constexpr auto mask_of_up_linear_dims = modifiy_sequence_by_ids(
|
||||
typename uniform_sequence_gen<nDimUp, 0>::type{},
|
||||
typename uniform_sequence_gen<UpDimensionId::Size(), 1>::type{},
|
||||
UpDimensionId{});
|
||||
|
||||
constexpr auto mask_of_up_nonlinear_dims = modifiy_sequence(
|
||||
typename uniform_sequence_gen<nDimUp, 0>::type{}, value_sequence, UpDimensionId{});
|
||||
|
||||
return mask_of_up_nonlinear_dims;
|
||||
};
|
||||
|
||||
__host__ __device__ static constexpr bool GetMaskOfNonLinearDimensions()
|
||||
{
|
||||
// create tuple of linear dimension masks, for all transformations
|
||||
constexpr auto tuple_of_nonlinear_dimension_mask =
|
||||
transform_tuples(lambda_1{}, Transforms{}, LowDimensionIds{}, UpDimensionIds{});
|
||||
|
||||
// reduce tuple of masks into one mask
|
||||
constexpr auto nonlinear_dimension_mask =
|
||||
unpack(lambda_sequence_logic_or{}, tuple_of_nonlinear_dimension_mask);
|
||||
|
||||
return nonlinear_dimension_mask;
|
||||
return mask_of_up_linear_dims;
|
||||
}
|
||||
};
|
||||
|
||||
__host__ __device__ static constexpr bool GetMaskOfLinearDimensions()
|
||||
{
|
||||
return GetMaskOfNonLinearDimensions().Transform(math::logic_not<bool>{});
|
||||
}
|
||||
__host__ __device__ static constexpr auto GetLinearDimensionMask()
|
||||
{
|
||||
// create tuple of linear dimension masks, for all transformations
|
||||
constexpr auto tuple_of_linear_dimension_mask =
|
||||
transform_tuples(lambda_get_linear_dimension_mask_of_single_tranform{},
|
||||
Transforms{},
|
||||
LowDimensionIds{},
|
||||
UpDimensionIds{});
|
||||
|
||||
template <index_t IDim>
|
||||
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
|
||||
{
|
||||
return GetMaskOfLinearDimensions().At(Number<IDim>{});
|
||||
}
|
||||
// reduce tuple of masks into one mask
|
||||
constexpr auto linear_dimension_mask =
|
||||
unpack(lambda_sequence_logical_and{}, tuple_of_linear_dimension_mask);
|
||||
|
||||
__host__ __device__ static constexpr auto GetLinearDimensions()
|
||||
{
|
||||
constexpr auto linear_dimension_mask = GetMaskOfLienarDimensions();
|
||||
return linear_dimension_mask;
|
||||
}
|
||||
|
||||
return pick_sequence_elements_by_mask(
|
||||
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, linear_dimension_mask);
|
||||
}
|
||||
__host__ __device__ static constexpr auto GetNonLinearDimensionMask()
|
||||
{
|
||||
return GetLinearDimensionMask().Transform(logical_not<index_t>{});
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetNonLinearDimensions()
|
||||
{
|
||||
constexpr auto nonlinear_dimension_mask =
|
||||
GetMaskOfLienarDimensions().Transform(math::logic_not<index_t>{});
|
||||
template <index_t IDim>
|
||||
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
|
||||
{
|
||||
return GetLinearDimensionMask().At(Number<IDim>{});
|
||||
}
|
||||
|
||||
return pick_sequence_elements_by_mask(
|
||||
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, nonlinear_dimension_mask);
|
||||
}
|
||||
__host__ __device__ static constexpr auto GetLinearDimensions()
|
||||
{
|
||||
constexpr auto linear_dimension_mask = GetLinearDimensionMask();
|
||||
|
||||
__host__ __device__ static constexpr auto GetNonLinearIndependentDimensionGroups()
|
||||
{
|
||||
// not implemented
|
||||
}
|
||||
return pick_sequence_elements_by_mask(
|
||||
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, linear_dimension_mask);
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetNonLinearDimensions()
|
||||
{
|
||||
constexpr auto nonlinear_dimension_mask = GetNonLinearDimensionMask();
|
||||
|
||||
return pick_sequence_elements_by_mask(
|
||||
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, nonlinear_dimension_mask);
|
||||
}
|
||||
|
||||
__host__ __device__ static constexpr auto GetNonLinearIndependentDimensionGroups()
|
||||
{
|
||||
// not implemented
|
||||
}
|
||||
#endif
|
||||
|
||||
__host__ __device__ static constexpr bool
|
||||
@@ -457,9 +466,10 @@ struct TransformedTensorDescriptor
|
||||
return flag;
|
||||
}
|
||||
|
||||
// Whenever this function is called, it will call CalculateLowerIndex() recursively
|
||||
// Whenever this function is called, it will call CalculateLowerIndex() recursively.
|
||||
// If you have created a tensor coordinate already, instead of calling this function,
|
||||
// you should call TransformedTensorCoordinate::IsUpperIndexMappedToValidOffset()
|
||||
// you should call TensorCoordinate::IsUpperIndexMappedToValidOffset() which would
|
||||
// be less expensive.
|
||||
__host__ __device__ static constexpr bool
|
||||
IsUpperIndexMappedToValidOffset(const UpperIndex& idx_up)
|
||||
{
|
||||
|
||||
@@ -25,6 +25,18 @@ struct swallow
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct logical_and
|
||||
{
|
||||
constexpr bool operator()(const T& x, const T& y) const { return x && y; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct logical_or
|
||||
{
|
||||
constexpr bool operator()(const T& x, const T& y) const { return x || y; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct logical_not
|
||||
{
|
||||
|
||||
@@ -311,7 +311,7 @@ struct sequence_reverse<Sequence<I0, I1>>
|
||||
using type = Sequence<I1, I0>;
|
||||
};
|
||||
|
||||
#if 0
|
||||
#if 1
|
||||
template <typename Reduce, typename Seq, typename... Seqs>
|
||||
struct sequence_reduce
|
||||
{
|
||||
@@ -755,46 +755,82 @@ __host__ __device__ constexpr auto pick_sequence_elements_by_ids(Seq, Sequence<I
|
||||
return Sequence<Seq::At(Number<Is>{})...>{};
|
||||
}
|
||||
|
||||
#if 0
|
||||
#if 1
|
||||
namespace detail {
|
||||
template <typename WorkSeq, typename RemainSeq, typename RemainMask>
|
||||
struct pick_sequence_elements_by_mask_impl
|
||||
{
|
||||
using new_work_seq = typename conditional<RemainMask::Front(),
|
||||
decltype(WorkSeq::PushBack(RemainSeq::Front())),
|
||||
WorkSeq>::type;
|
||||
|
||||
using type =
|
||||
typename pick_sequence_elements_by_mask_impl<new_work_seq,
|
||||
decltype(RemainSeq::PopFront()),
|
||||
decltype(RemainMask::PopFront())>::type;
|
||||
};
|
||||
|
||||
template <typename WorkSeq>
|
||||
struct pick_sequence_elements_by_mask_impl<WorkSeq, Sequence<>, Sequence<>>
|
||||
{
|
||||
using type = WorkSeq;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template <typename Seq, typename Mask>
|
||||
__host__ __device__ constexpr auto pick_sequence_elements_by_mask(Seq, Mask)
|
||||
{
|
||||
// not implemented
|
||||
static_assert(Seq::Size() == Mask::Size(), "wrong!");
|
||||
|
||||
return typename detail::pick_sequence_elements_by_mask_impl<Sequence<>, Seq, Mask>::type{};
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
template <typename WorkSeq, typename RemainValues, typename RemainIds>
|
||||
struct modify_sequence_elements_by_ids_impl
|
||||
{
|
||||
using new_work_seq = decltype(WorkSeq::Modify(RemainIds::Front(), RemainValues::Front()));
|
||||
|
||||
using type =
|
||||
typename modify_sequence_elements_by_ids_impl<new_work_seq,
|
||||
decltype(RemainValues::PopFront()),
|
||||
decltype(RemainIds::PopFront())>::type;
|
||||
};
|
||||
|
||||
template <typename WorkSeq>
|
||||
struct modify_sequence_elements_by_ids_impl<WorkSeq, Sequence<>, Sequence<>>
|
||||
{
|
||||
using type = WorkSeq;
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
template <typename Seq, typename Values, typename Ids>
|
||||
__host__ __device__ constexpr auto modify_sequence_elements_by_ids(Seq, Values, Ids)
|
||||
{
|
||||
static_assert(Values::Size() == Ids::Size() && Seq::Size() >= Values::Size(), "wrong!");
|
||||
|
||||
return typename detail::modify_sequence_elements_by_ids_impl<Seq, Values, Ids>::type{};
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Seq, typename Reduce>
|
||||
struct lambda_reduce_on_sequence
|
||||
{
|
||||
const Reduce& f;
|
||||
index_t& result;
|
||||
|
||||
__host__ __device__ constexpr lambda_reduce_on_sequence(const Reduce& f_, index_t& result_)
|
||||
: f(f_), result(result_)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename IDim>
|
||||
__host__ __device__ constexpr index_t operator()(IDim) const
|
||||
{
|
||||
return result = f(result, Seq::At(IDim{}));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Seq, typename Reduce, index_t Init>
|
||||
__host__ __device__ constexpr index_t
|
||||
reduce_on_sequence(Seq, Reduce f, Number<Init> /*initial_value*/)
|
||||
{
|
||||
index_t result = Init;
|
||||
|
||||
static_for<0, Seq::Size(), 1>{}(lambda_reduce_on_sequence<Seq, Reduce>(f, result));
|
||||
for(index_t i = 0; i < Seq::Size(); ++i)
|
||||
{
|
||||
result = f(result, Seq::At(i));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO: a generic any_of for any container
|
||||
template <typename Seq, typename F>
|
||||
__host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/)
|
||||
__host__ __device__ constexpr bool sequence_any_of(Seq, F f)
|
||||
{
|
||||
bool flag = false;
|
||||
|
||||
@@ -808,7 +844,7 @@ __host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/)
|
||||
|
||||
// TODO: a generic all_of for any container
|
||||
template <typename Seq, typename F>
|
||||
__host__ __device__ constexpr bool sequence_all_of(Seq, F f /*initial_value*/)
|
||||
__host__ __device__ constexpr bool sequence_all_of(Seq, F f)
|
||||
{
|
||||
bool flag = true;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user