mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-11 16:59:10 +00:00
Merge pull request #3 from ROCmSoftwarePlatform/format
Update to clang-format-10
This commit is contained in:
@@ -33,13 +33,11 @@ struct DynamicTensorDescriptor
|
|||||||
|
|
||||||
__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
|
__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
|
||||||
{
|
{
|
||||||
constexpr auto all_low_dim_ids =
|
constexpr auto all_low_dim_ids = unpack(
|
||||||
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionIdss{});
|
||||||
LowerDimensionIdss{});
|
|
||||||
|
|
||||||
constexpr auto all_up_dim_ids =
|
constexpr auto all_up_dim_ids = unpack(
|
||||||
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionIdss{});
|
||||||
UpperDimensionIdss{});
|
|
||||||
|
|
||||||
constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
|
constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
|
||||||
|
|
||||||
@@ -347,22 +345,22 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
|
|||||||
constexpr auto up_dim_numbers_scan = merge_sequences(
|
constexpr auto up_dim_numbers_scan = merge_sequences(
|
||||||
Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus<index_t>{}, Number<0>{}));
|
Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus<index_t>{}, Number<0>{}));
|
||||||
|
|
||||||
constexpr auto up_dim_hidden_idss =
|
constexpr auto up_dim_hidden_idss = generate_tuple(
|
||||||
generate_tuple([ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
|
[ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
|
||||||
return
|
return
|
||||||
typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
|
typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
|
||||||
old_hidden_dim_number + up_dim_numbers_scan[i + 1],
|
old_hidden_dim_number + up_dim_numbers_scan[i + 1],
|
||||||
1>::type{};
|
1>::type{};
|
||||||
},
|
},
|
||||||
Number<num_new_transform>{});
|
Number<num_new_transform>{});
|
||||||
|
|
||||||
// new visible dimension's hidden ids
|
// new visible dimension's hidden ids
|
||||||
constexpr auto unordered_new_visible_dim_hidden_ids =
|
constexpr auto unordered_new_visible_dim_hidden_ids = unpack(
|
||||||
unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
|
[](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
|
||||||
|
|
||||||
constexpr auto new_visible_dim_unordered2ordered =
|
constexpr auto new_visible_dim_unordered2ordered = unpack(
|
||||||
unpack([](auto... xs) constexpr { return merge_sequences(xs...); },
|
[](auto... xs) constexpr { return merge_sequences(xs...); },
|
||||||
NewUpperDimensionNewVisibleIdss{});
|
NewUpperDimensionNewVisibleIdss{});
|
||||||
|
|
||||||
constexpr auto new_visible_dim_hidden_ids =
|
constexpr auto new_visible_dim_hidden_ids =
|
||||||
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
|
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
|
||||||
|
|||||||
@@ -106,13 +106,13 @@ struct TensorAdaptor
|
|||||||
|
|
||||||
__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
|
__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
|
||||||
{
|
{
|
||||||
constexpr auto all_low_dim_ids =
|
constexpr auto all_low_dim_ids = unpack(
|
||||||
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
[](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
||||||
LowerDimensionHiddenIdss{});
|
LowerDimensionHiddenIdss{});
|
||||||
|
|
||||||
constexpr auto all_up_dim_ids =
|
constexpr auto all_up_dim_ids = unpack(
|
||||||
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
[](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
||||||
UpperDimensionHiddenIdss{});
|
UpperDimensionHiddenIdss{});
|
||||||
|
|
||||||
constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
|
constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
|
||||||
|
|
||||||
@@ -418,13 +418,11 @@ __host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transf
|
|||||||
"wrong!");
|
"wrong!");
|
||||||
|
|
||||||
// sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss
|
// sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss
|
||||||
constexpr auto all_low_dim_old_top_ids =
|
constexpr auto all_low_dim_old_top_ids = unpack(
|
||||||
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionOldTopIdss{});
|
||||||
LowerDimensionOldTopIdss{});
|
|
||||||
|
|
||||||
constexpr auto all_up_dim_new_top_ids =
|
constexpr auto all_up_dim_new_top_ids = unpack(
|
||||||
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
|
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionNewTopIdss{});
|
||||||
UpperDimensionNewTopIdss{});
|
|
||||||
|
|
||||||
static_assert(is_valid_sequence_map<decltype(all_low_dim_old_top_ids)>::value &&
|
static_assert(is_valid_sequence_map<decltype(all_low_dim_old_top_ids)>::value &&
|
||||||
is_valid_sequence_map<decltype(all_up_dim_new_top_ids)>::value,
|
is_valid_sequence_map<decltype(all_up_dim_new_top_ids)>::value,
|
||||||
|
|||||||
@@ -152,7 +152,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3
|
|||||||
|
|
||||||
static_for<0, EPerBlock, EPerThreadLoop>{}([&](auto e_begin) {
|
static_for<0, EPerBlock, EPerThreadLoop>{}([&](auto e_begin) {
|
||||||
static_for<0, KPerThread, KPerThreadSubC>{}([&](auto k_begin) {
|
static_for<0, KPerThread, KPerThreadSubC>{}([&](auto k_begin) {
|
||||||
|
|
||||||
a_thread_copy_.Run(a_block_mtx,
|
a_thread_copy_.Run(a_block_mtx,
|
||||||
make_tuple(e_begin, k_begin),
|
make_tuple(e_begin, k_begin),
|
||||||
a_block_buf,
|
a_block_buf,
|
||||||
|
|||||||
@@ -87,7 +87,6 @@ struct ThreadwiseGemmDlops_km0m1_kn0n1_m0m1n0n1
|
|||||||
static_for<0, TM1, 1>{}([&](auto tm1) {
|
static_for<0, TM1, 1>{}([&](auto tm1) {
|
||||||
static_for<0, TN0, 1>{}([&](auto tn0) {
|
static_for<0, TN0, 1>{}([&](auto tn0) {
|
||||||
static_for<0, TN1, 1>{}([&](auto tn1) {
|
static_for<0, TN1, 1>{}([&](auto tn1) {
|
||||||
|
|
||||||
constexpr index_t a_offset =
|
constexpr index_t a_offset =
|
||||||
AThreadDesc_TK0_TM0_TM1_TK1{}.CalculateOffset(
|
AThreadDesc_TK0_TM0_TM1_TK1{}.CalculateOffset(
|
||||||
a_origin_idx + make_multi_index(tk, tm0, tm1));
|
a_origin_idx + make_multi_index(tk, tm0, tm1));
|
||||||
@@ -192,7 +191,6 @@ struct ThreadwiseContractionDlops_A_TK0_TM0_TM1_TK1_B_TK0_TN0_TN1_TK1_C_TM0_TM1_
|
|||||||
static_for<0, TM1, 1>{}([&](auto tm1) {
|
static_for<0, TM1, 1>{}([&](auto tm1) {
|
||||||
static_for<0, TN0, 1>{}([&](auto tn0) {
|
static_for<0, TN0, 1>{}([&](auto tn0) {
|
||||||
static_for<0, TN1, 1>{}([&](auto tn1) {
|
static_for<0, TN1, 1>{}([&](auto tn1) {
|
||||||
|
|
||||||
vector_type<FloatA, TK1> a_vec;
|
vector_type<FloatA, TK1> a_vec;
|
||||||
vector_type<FloatB, TK1> b_vec;
|
vector_type<FloatB, TK1> b_vec;
|
||||||
|
|
||||||
|
|||||||
@@ -136,7 +136,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3
|
|||||||
{
|
{
|
||||||
static_for<0, H, 1>{}([&](auto h) {
|
static_for<0, H, 1>{}([&](auto h) {
|
||||||
static_for<0, W, 1>{}([&](auto w) {
|
static_for<0, W, 1>{}([&](auto w) {
|
||||||
|
|
||||||
constexpr index_t b_offset =
|
constexpr index_t b_offset =
|
||||||
BDesc{}.CalculateOffset(b_origin_idx + make_tuple(e, 0, h, w));
|
BDesc{}.CalculateOffset(b_origin_idx + make_tuple(e, 0, h, w));
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,8 @@
|
|||||||
namespace ck {
|
namespace ck {
|
||||||
|
|
||||||
// this enumerate should be synchronized with include/miopen.h
|
// this enumerate should be synchronized with include/miopen.h
|
||||||
typedef enum {
|
typedef enum
|
||||||
|
{
|
||||||
Half = 0,
|
Half = 0,
|
||||||
Float = 1,
|
Float = 1,
|
||||||
Int32 = 2,
|
Int32 = 2,
|
||||||
|
|||||||
12
external/half/include/half.hpp
vendored
12
external/half/include/half.hpp
vendored
@@ -2399,11 +2399,11 @@ unsigned int erf(unsigned int arg)
|
|||||||
template <std::float_round_style R, bool L>
|
template <std::float_round_style R, bool L>
|
||||||
unsigned int gamma(unsigned int arg)
|
unsigned int gamma(unsigned int arg)
|
||||||
{
|
{
|
||||||
/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
|
/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544,
|
||||||
double t = arg + 4.65, s = p[0];
|
-268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837,
|
||||||
for(unsigned int i=0; i<5; ++i)
|
0.0114684895434781459556 }; double t = arg + 4.65, s = p[0]; for(unsigned int i=0; i<5; ++i)
|
||||||
s += p[i+1] / (arg+i);
|
s += p[i+1] / (arg+i);
|
||||||
return std::log(s) + (arg-0.5)*std::log(t) - t;
|
return std::log(s) + (arg-0.5)*std::log(t) - t;
|
||||||
*/ static const f31 pi(0xC90FDAA2, 1),
|
*/ static const f31 pi(0xC90FDAA2, 1),
|
||||||
lbe(0xB8AA3B29, 0);
|
lbe(0xB8AA3B29, 0);
|
||||||
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
|
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
|
||||||
@@ -2506,7 +2506,7 @@ unsigned int gamma(unsigned int arg)
|
|||||||
|
|
||||||
template <typename, typename, std::float_round_style>
|
template <typename, typename, std::float_round_style>
|
||||||
struct half_caster;
|
struct half_caster;
|
||||||
}
|
} // namespace detail
|
||||||
|
|
||||||
/// Half-precision floating-point type.
|
/// Half-precision floating-point type.
|
||||||
/// This class implements an IEEE-conformant half-precision floating-point type with the usual
|
/// This class implements an IEEE-conformant half-precision floating-point type with the usual
|
||||||
|
|||||||
@@ -39,7 +39,8 @@ std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
|
|||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef enum {
|
typedef enum
|
||||||
|
{
|
||||||
Half = 0,
|
Half = 0,
|
||||||
Float = 1,
|
Float = 1,
|
||||||
} DataType_t;
|
} DataType_t;
|
||||||
@@ -227,27 +228,23 @@ struct Tensor
|
|||||||
{
|
{
|
||||||
switch(mDesc.GetNumOfDimension())
|
switch(mDesc.GetNumOfDimension())
|
||||||
{
|
{
|
||||||
case 1:
|
case 1: {
|
||||||
{
|
|
||||||
auto f = [&](auto i) { (*this)(i) = g(i); };
|
auto f = [&](auto i) { (*this)(i) = g(i); };
|
||||||
make_ParallelTensorFunctor(f, mDesc.GetLengths()[0])(num_thread);
|
make_ParallelTensorFunctor(f, mDesc.GetLengths()[0])(num_thread);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 2:
|
case 2: {
|
||||||
{
|
|
||||||
auto f = [&](auto i0, auto i1) { (*this)(i0, i1) = g(i0, i1); };
|
auto f = [&](auto i0, auto i1) { (*this)(i0, i1) = g(i0, i1); };
|
||||||
make_ParallelTensorFunctor(f, mDesc.GetLengths()[0], mDesc.GetLengths()[1])(num_thread);
|
make_ParallelTensorFunctor(f, mDesc.GetLengths()[0], mDesc.GetLengths()[1])(num_thread);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 3:
|
case 3: {
|
||||||
{
|
|
||||||
auto f = [&](auto i0, auto i1, auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
|
auto f = [&](auto i0, auto i1, auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
|
||||||
make_ParallelTensorFunctor(
|
make_ParallelTensorFunctor(
|
||||||
f, mDesc.GetLengths()[0], mDesc.GetLengths()[1], mDesc.GetLengths()[2])(num_thread);
|
f, mDesc.GetLengths()[0], mDesc.GetLengths()[1], mDesc.GetLengths()[2])(num_thread);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 4:
|
case 4: {
|
||||||
{
|
|
||||||
auto f = [&](auto i0, auto i1, auto i2, auto i3) {
|
auto f = [&](auto i0, auto i1, auto i2, auto i3) {
|
||||||
(*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
|
(*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -145,9 +145,7 @@ void KernelCache::ClearKernels(const std::string& algorithm, const std::string&
|
|||||||
}
|
}
|
||||||
const std::pair<std::string, std::string> key = std::make_pair(algorithm, network_config);
|
const std::pair<std::string, std::string> key = std::make_pair(algorithm, network_config);
|
||||||
auto&& v = this->kernel_map[key];
|
auto&& v = this->kernel_map[key];
|
||||||
if(!v.empty())
|
if(!v.empty()) {}
|
||||||
{
|
|
||||||
}
|
|
||||||
v.clear();
|
v.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -40,4 +40,4 @@ ostream& fdt_log(LogLevel level, const char* header, const char* content)
|
|||||||
ostream& fdt_log() { return (cerr); };
|
ostream& fdt_log() { return (cerr); };
|
||||||
|
|
||||||
void fdt_log_flush() { cerr << endl; }
|
void fdt_log_flush() { cerr << endl; }
|
||||||
};
|
}; // namespace olCompile
|
||||||
|
|||||||
Reference in New Issue
Block a user