diff --git a/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp b/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp index ebb970f481..b9ca26c879 100644 --- a/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp +++ b/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp @@ -33,13 +33,11 @@ struct DynamicTensorDescriptor __host__ __device__ static constexpr index_t GetNumOfHiddenDimension() { - constexpr auto all_low_dim_ids = - unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); }, - LowerDimensionIdss{}); + constexpr auto all_low_dim_ids = unpack( + [](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionIdss{}); - constexpr auto all_up_dim_ids = - unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); }, - UpperDimensionIdss{}); + constexpr auto all_up_dim_ids = unpack( + [](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionIdss{}); constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids); @@ -347,22 +345,22 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc, constexpr auto up_dim_numbers_scan = merge_sequences( Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus{}, Number<0>{})); - constexpr auto up_dim_hidden_idss = - generate_tuple([ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr { + constexpr auto up_dim_hidden_idss = generate_tuple( + [ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr { return typename arithmetic_sequence_gen::type{}; }, - Number{}); + Number{}); // new visible dimension's hidden ids - constexpr auto unordered_new_visible_dim_hidden_ids = - unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss); + constexpr auto unordered_new_visible_dim_hidden_ids = unpack( + [](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss); - constexpr auto new_visible_dim_unordered2ordered = - unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, - NewUpperDimensionNewVisibleIdss{}); + constexpr auto new_visible_dim_unordered2ordered = unpack( + [](auto... xs) constexpr { return merge_sequences(xs...); }, + NewUpperDimensionNewVisibleIdss{}); constexpr auto new_visible_dim_hidden_ids = unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered); diff --git a/composable_kernel/include/tensor_description/tensor_adaptor.hpp b/composable_kernel/include/tensor_description/tensor_adaptor.hpp index 5e8f898f26..6affe6141f 100644 --- a/composable_kernel/include/tensor_description/tensor_adaptor.hpp +++ b/composable_kernel/include/tensor_description/tensor_adaptor.hpp @@ -106,13 +106,13 @@ struct TensorAdaptor __host__ __device__ static constexpr index_t GetNumOfHiddenDimension() { - constexpr auto all_low_dim_ids = - unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); }, - LowerDimensionHiddenIdss{}); + constexpr auto all_low_dim_ids = unpack( + [](auto&&... xs) constexpr { return merge_sequences(xs...); }, + LowerDimensionHiddenIdss{}); - constexpr auto all_up_dim_ids = - unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); }, - UpperDimensionHiddenIdss{}); + constexpr auto all_up_dim_ids = unpack( + [](auto&&... xs) constexpr { return merge_sequences(xs...); }, + UpperDimensionHiddenIdss{}); constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids); @@ -418,13 +418,11 @@ __host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transf "wrong!"); // sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss - constexpr auto all_low_dim_old_top_ids = - unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); }, - LowerDimensionOldTopIdss{}); + constexpr auto all_low_dim_old_top_ids = unpack( + [](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionOldTopIdss{}); - constexpr auto all_up_dim_new_top_ids = - unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); }, - UpperDimensionNewTopIdss{}); + constexpr auto all_up_dim_new_top_ids = unpack( + [](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionNewTopIdss{}); static_assert(is_valid_sequence_map::value && is_valid_sequence_map::value, diff --git a/composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp b/composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp index e624ad0b4d..074d519b76 100644 --- a/composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp @@ -152,7 +152,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3 static_for<0, EPerBlock, EPerThreadLoop>{}([&](auto e_begin) { static_for<0, KPerThread, KPerThreadSubC>{}([&](auto k_begin) { - a_thread_copy_.Run(a_block_mtx, make_tuple(e_begin, k_begin), a_block_buf, diff --git a/composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp b/composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp index 0440bc0312..7e7bb9c8c3 100644 --- a/composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp @@ -87,7 +87,6 @@ struct ThreadwiseGemmDlops_km0m1_kn0n1_m0m1n0n1 static_for<0, TM1, 1>{}([&](auto tm1) { static_for<0, TN0, 1>{}([&](auto tn0) { static_for<0, TN1, 1>{}([&](auto tn1) { - constexpr index_t a_offset = AThreadDesc_TK0_TM0_TM1_TK1{}.CalculateOffset( a_origin_idx + make_multi_index(tk, tm0, tm1)); @@ -192,7 +191,6 @@ struct ThreadwiseContractionDlops_A_TK0_TM0_TM1_TK1_B_TK0_TN0_TN1_TK1_C_TM0_TM1_ static_for<0, TM1, 1>{}([&](auto tm1) { static_for<0, TN0, 1>{}([&](auto tn0) { static_for<0, TN1, 1>{}([&](auto tn1) { - vector_type a_vec; vector_type b_vec; diff --git a/composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp b/composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp index f31150c2cf..153d512df7 100644 --- a/composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp @@ -136,7 +136,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3 { static_for<0, H, 1>{}([&](auto h) { static_for<0, W, 1>{}([&](auto w) { - constexpr index_t b_offset = BDesc{}.CalculateOffset(b_origin_idx + make_tuple(e, 0, h, w)); diff --git a/composable_kernel/include/utility/data_type_enum.hpp b/composable_kernel/include/utility/data_type_enum.hpp index fba380a5fc..43499605dc 100644 --- a/composable_kernel/include/utility/data_type_enum.hpp +++ b/composable_kernel/include/utility/data_type_enum.hpp @@ -4,7 +4,8 @@ namespace ck { // this enumerate should be synchronized with include/miopen.h -typedef enum { +typedef enum +{ Half = 0, Float = 1, Int32 = 2, diff --git a/external/half/include/half.hpp b/external/half/include/half.hpp index f15e8d00dd..b698aac39f 100644 --- a/external/half/include/half.hpp +++ b/external/half/include/half.hpp @@ -2399,11 +2399,11 @@ unsigned int erf(unsigned int arg) template unsigned int gamma(unsigned int arg) { - /* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 }; - double t = arg + 4.65, s = p[0]; - for(unsigned int i=0; i<5; ++i) - s += p[i+1] / (arg+i); - return std::log(s) + (arg-0.5)*std::log(t) - t; + /* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, + -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, + 0.0114684895434781459556 }; double t = arg + 4.65, s = p[0]; for(unsigned int i=0; i<5; ++i) + s += p[i+1] / (arg+i); + return std::log(s) + (arg-0.5)*std::log(t) - t; */ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0); unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; @@ -2506,7 +2506,7 @@ unsigned int gamma(unsigned int arg) template struct half_caster; -} +} // namespace detail /// Half-precision floating-point type. /// This class implements an IEEE-conformant half-precision floating-point type with the usual diff --git a/host/host_tensor/include/host_tensor.hpp b/host/host_tensor/include/host_tensor.hpp index d4998d511f..70778a4a94 100644 --- a/host/host_tensor/include/host_tensor.hpp +++ b/host/host_tensor/include/host_tensor.hpp @@ -39,7 +39,8 @@ std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim) return os; } -typedef enum { +typedef enum +{ Half = 0, Float = 1, } DataType_t; @@ -227,27 +228,23 @@ struct Tensor { switch(mDesc.GetNumOfDimension()) { - case 1: - { + case 1: { auto f = [&](auto i) { (*this)(i) = g(i); }; make_ParallelTensorFunctor(f, mDesc.GetLengths()[0])(num_thread); break; } - case 2: - { + case 2: { auto f = [&](auto i0, auto i1) { (*this)(i0, i1) = g(i0, i1); }; make_ParallelTensorFunctor(f, mDesc.GetLengths()[0], mDesc.GetLengths()[1])(num_thread); break; } - case 3: - { + case 3: { auto f = [&](auto i0, auto i1, auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); }; make_ParallelTensorFunctor( f, mDesc.GetLengths()[0], mDesc.GetLengths()[1], mDesc.GetLengths()[2])(num_thread); break; } - case 4: - { + case 4: { auto f = [&](auto i0, auto i1, auto i2, auto i3) { (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3); }; diff --git a/host/online_compilation/hip_utility/kernel_cache.cpp b/host/online_compilation/hip_utility/kernel_cache.cpp index 8ecf54b0c6..fff57c194e 100644 --- a/host/online_compilation/hip_utility/kernel_cache.cpp +++ b/host/online_compilation/hip_utility/kernel_cache.cpp @@ -145,9 +145,7 @@ void KernelCache::ClearKernels(const std::string& algorithm, const std::string& } const std::pair key = std::make_pair(algorithm, network_config); auto&& v = this->kernel_map[key]; - if(!v.empty()) - { - } + if(!v.empty()) {} v.clear(); } diff --git a/host/online_compilation/hip_utility/logger.cpp b/host/online_compilation/hip_utility/logger.cpp index 15e578cb77..e8d31562a5 100644 --- a/host/online_compilation/hip_utility/logger.cpp +++ b/host/online_compilation/hip_utility/logger.cpp @@ -40,4 +40,4 @@ ostream& fdt_log(LogLevel level, const char* header, const char* content) ostream& fdt_log() { return (cerr); }; void fdt_log_flush() { cerr << endl; } -}; +}; // namespace olCompile